mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	perf events: Clean up pid passing
The kernel perf event creation path shouldn't use find_task_by_vpid() because a vpid exists in a specific namespace. find_task_by_vpid() uses current's pid namespace which isn't always the correct namespace to use for the vpid in all the places perf_event_create_kernel_counter() (and thus find_get_context()) is called. The goal is to clean up pid namespace handling and prevent bugs like: https://bugzilla.kernel.org/show_bug.cgi?id=17281 Instead of using pids switch find_get_context() to use task struct pointers directly. The syscall is responsible for resolving the pid to a task struct. This moves the pid namespace resolution into the syscall much like every other syscall that takes pid parameters. Signed-off-by: Matt Helsley <matthltc@us.ibm.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robin Green <greenrd@greenrd.org> Cc: Prasad <prasad@linux.vnet.ibm.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will.deacon@arm.com> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> LKML-Reference: <a134e5e392ab0204961fd1a62c84a222bf5874a9.1284407763.git.matthltc@us.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
		
							parent
							
								
									2ebd4ffb6d
								
							
						
					
					
						commit
						38a81da220
					
				
					 5 changed files with 15 additions and 17 deletions
				
			
		| 
						 | 
					@ -96,7 +96,7 @@ static int op_create_counter(int cpu, int event)
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
 | 
						pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
 | 
				
			||||||
						  cpu, -1,
 | 
											  cpu, NULL,
 | 
				
			||||||
						  op_overflow_handler);
 | 
											  op_overflow_handler);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (IS_ERR(pevent)) {
 | 
						if (IS_ERR(pevent)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -902,7 +902,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
 | 
				
			||||||
extern struct perf_event *
 | 
					extern struct perf_event *
 | 
				
			||||||
perf_event_create_kernel_counter(struct perf_event_attr *attr,
 | 
					perf_event_create_kernel_counter(struct perf_event_attr *attr,
 | 
				
			||||||
				int cpu,
 | 
									int cpu,
 | 
				
			||||||
				pid_t pid,
 | 
									struct task_struct *task,
 | 
				
			||||||
				perf_overflow_handler_t callback);
 | 
									perf_overflow_handler_t callback);
 | 
				
			||||||
extern u64 perf_event_read_value(struct perf_event *event,
 | 
					extern u64 perf_event_read_value(struct perf_event *event,
 | 
				
			||||||
				 u64 *enabled, u64 *running);
 | 
									 u64 *enabled, u64 *running);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 | 
				
			||||||
			    perf_overflow_handler_t triggered,
 | 
								    perf_overflow_handler_t triggered,
 | 
				
			||||||
			    struct task_struct *tsk)
 | 
								    struct task_struct *tsk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk),
 | 
						return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
 | 
				
			||||||
						triggered);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 | 
					EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 | 
				
			||||||
	get_online_cpus();
 | 
						get_online_cpus();
 | 
				
			||||||
	for_each_online_cpu(cpu) {
 | 
						for_each_online_cpu(cpu) {
 | 
				
			||||||
		pevent = per_cpu_ptr(cpu_events, cpu);
 | 
							pevent = per_cpu_ptr(cpu_events, cpu);
 | 
				
			||||||
		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 | 
							bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		*pevent = bp;
 | 
							*pevent = bp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2053,15 +2053,14 @@ find_lively_task_by_vpid(pid_t vpid)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct perf_event_context *
 | 
					static struct perf_event_context *
 | 
				
			||||||
find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 | 
					find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event_context *ctx;
 | 
						struct perf_event_context *ctx;
 | 
				
			||||||
	struct perf_cpu_context *cpuctx;
 | 
						struct perf_cpu_context *cpuctx;
 | 
				
			||||||
	struct task_struct *task;
 | 
					 | 
				
			||||||
	unsigned long flags;
 | 
						unsigned long flags;
 | 
				
			||||||
	int ctxn, err;
 | 
						int ctxn, err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pid == -1 && cpu != -1) {
 | 
						if (!task && cpu != -1) {
 | 
				
			||||||
		/* Must be root to operate on a CPU event: */
 | 
							/* Must be root to operate on a CPU event: */
 | 
				
			||||||
		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 | 
							if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 | 
				
			||||||
			return ERR_PTR(-EACCES);
 | 
								return ERR_PTR(-EACCES);
 | 
				
			||||||
| 
						 | 
					@ -2084,10 +2083,6 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 | 
				
			||||||
		return ctx;
 | 
							return ctx;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	task = find_lively_task_by_vpid(pid);
 | 
					 | 
				
			||||||
	if (IS_ERR(task))
 | 
					 | 
				
			||||||
		return (void*)task;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	err = -EINVAL;
 | 
						err = -EINVAL;
 | 
				
			||||||
	ctxn = pmu->task_ctx_nr;
 | 
						ctxn = pmu->task_ctx_nr;
 | 
				
			||||||
	if (ctxn < 0)
 | 
						if (ctxn < 0)
 | 
				
			||||||
| 
						 | 
					@ -5527,6 +5522,7 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
	struct perf_event_context *ctx;
 | 
						struct perf_event_context *ctx;
 | 
				
			||||||
	struct file *event_file = NULL;
 | 
						struct file *event_file = NULL;
 | 
				
			||||||
	struct file *group_file = NULL;
 | 
						struct file *group_file = NULL;
 | 
				
			||||||
 | 
						struct task_struct *task = NULL;
 | 
				
			||||||
	struct pmu *pmu;
 | 
						struct pmu *pmu;
 | 
				
			||||||
	int event_fd;
 | 
						int event_fd;
 | 
				
			||||||
	int fput_needed = 0;
 | 
						int fput_needed = 0;
 | 
				
			||||||
| 
						 | 
					@ -5581,10 +5577,13 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
	if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
 | 
						if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
 | 
				
			||||||
		pmu = group_leader->pmu;
 | 
							pmu = group_leader->pmu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (pid != -1)
 | 
				
			||||||
 | 
							task = find_lively_task_by_vpid(pid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Get the target context (task or percpu):
 | 
						 * Get the target context (task or percpu):
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	ctx = find_get_context(pmu, pid, cpu);
 | 
						ctx = find_get_context(pmu, task, cpu);
 | 
				
			||||||
	if (IS_ERR(ctx)) {
 | 
						if (IS_ERR(ctx)) {
 | 
				
			||||||
		err = PTR_ERR(ctx);
 | 
							err = PTR_ERR(ctx);
 | 
				
			||||||
		goto err_group_fd;
 | 
							goto err_group_fd;
 | 
				
			||||||
| 
						 | 
					@ -5666,11 +5665,11 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * @attr: attributes of the counter to create
 | 
					 * @attr: attributes of the counter to create
 | 
				
			||||||
 * @cpu: cpu in which the counter is bound
 | 
					 * @cpu: cpu in which the counter is bound
 | 
				
			||||||
 * @pid: task to profile
 | 
					 * @task: task to profile (NULL for percpu)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct perf_event *
 | 
					struct perf_event *
 | 
				
			||||||
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 | 
					perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
				 pid_t pid,
 | 
									 struct task_struct *task,
 | 
				
			||||||
				 perf_overflow_handler_t overflow_handler)
 | 
									 perf_overflow_handler_t overflow_handler)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event_context *ctx;
 | 
						struct perf_event_context *ctx;
 | 
				
			||||||
| 
						 | 
					@ -5687,7 +5686,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
		goto err;
 | 
							goto err;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ctx = find_get_context(event->pmu, pid, cpu);
 | 
						ctx = find_get_context(event->pmu, task, cpu);
 | 
				
			||||||
	if (IS_ERR(ctx)) {
 | 
						if (IS_ERR(ctx)) {
 | 
				
			||||||
		err = PTR_ERR(ctx);
 | 
							err = PTR_ERR(ctx);
 | 
				
			||||||
		goto err_free;
 | 
							goto err_free;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -358,7 +358,7 @@ static int watchdog_nmi_enable(int cpu)
 | 
				
			||||||
	/* Try to register using hardware perf events */
 | 
						/* Try to register using hardware perf events */
 | 
				
			||||||
	wd_attr = &wd_hw_attr;
 | 
						wd_attr = &wd_hw_attr;
 | 
				
			||||||
	wd_attr->sample_period = hw_nmi_get_sample_period();
 | 
						wd_attr->sample_period = hw_nmi_get_sample_period();
 | 
				
			||||||
	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
 | 
						event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 | 
				
			||||||
	if (!IS_ERR(event)) {
 | 
						if (!IS_ERR(event)) {
 | 
				
			||||||
		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
 | 
							printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
 | 
				
			||||||
		goto out_save;
 | 
							goto out_save;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue