mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	perf: Rework perf_event_exit_event()
Make perf_event_exit_event() more robust, such that we can use it from other contexts. Specifically the up and coming remove_on_exec. For this to work we need to address a few issues. Remove_on_exec will not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to disable event_function_call() and we thus have to use perf_remove_from_context(). When using perf_remove_from_context(), there's two races to consider. The first is against close(), where we can have concurrent tear-down of the event. The second is against child_list iteration, which should not find a half baked event. To address this, teach perf_remove_from_context() to special case !ctx->is_active and about DETACH_CHILD. [ elver@google.com: fix racing parent/child exit in sync_child_event(). ] Signed-off-by: Marco Elver <elver@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20210408103605.1676875-2-elver@google.com
This commit is contained in:
		
							parent
							
								
									874fc35cdd
								
							
						
					
					
						commit
						ef54c1a476
					
				
					 2 changed files with 83 additions and 66 deletions
				
			
		| 
						 | 
					@ -607,6 +607,7 @@ struct swevent_hlist {
 | 
				
			||||||
#define PERF_ATTACH_TASK_DATA	0x08
 | 
					#define PERF_ATTACH_TASK_DATA	0x08
 | 
				
			||||||
#define PERF_ATTACH_ITRACE	0x10
 | 
					#define PERF_ATTACH_ITRACE	0x10
 | 
				
			||||||
#define PERF_ATTACH_SCHED_CB	0x20
 | 
					#define PERF_ATTACH_SCHED_CB	0x20
 | 
				
			||||||
 | 
					#define PERF_ATTACH_CHILD	0x40
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct perf_cgroup;
 | 
					struct perf_cgroup;
 | 
				
			||||||
struct perf_buffer;
 | 
					struct perf_buffer;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2205,6 +2205,26 @@ static void perf_group_detach(struct perf_event *event)
 | 
				
			||||||
	perf_event__header_size(leader);
 | 
						perf_event__header_size(leader);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void sync_child_event(struct perf_event *child_event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void perf_child_detach(struct perf_event *event)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct perf_event *parent_event = event->parent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(event->attach_state & PERF_ATTACH_CHILD))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event->attach_state &= ~PERF_ATTACH_CHILD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (WARN_ON_ONCE(!parent_event))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lockdep_assert_held(&parent_event->child_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sync_child_event(event);
 | 
				
			||||||
 | 
						list_del_init(&event->child_list);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool is_orphaned_event(struct perf_event *event)
 | 
					static bool is_orphaned_event(struct perf_event *event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return event->state == PERF_EVENT_STATE_DEAD;
 | 
						return event->state == PERF_EVENT_STATE_DEAD;
 | 
				
			||||||
| 
						 | 
					@ -2312,6 +2332,7 @@ group_sched_out(struct perf_event *group_event,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define DETACH_GROUP	0x01UL
 | 
					#define DETACH_GROUP	0x01UL
 | 
				
			||||||
 | 
					#define DETACH_CHILD	0x02UL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Cross CPU call to remove a performance event
 | 
					 * Cross CPU call to remove a performance event
 | 
				
			||||||
| 
						 | 
					@ -2335,6 +2356,8 @@ __perf_remove_from_context(struct perf_event *event,
 | 
				
			||||||
	event_sched_out(event, cpuctx, ctx);
 | 
						event_sched_out(event, cpuctx, ctx);
 | 
				
			||||||
	if (flags & DETACH_GROUP)
 | 
						if (flags & DETACH_GROUP)
 | 
				
			||||||
		perf_group_detach(event);
 | 
							perf_group_detach(event);
 | 
				
			||||||
 | 
						if (flags & DETACH_CHILD)
 | 
				
			||||||
 | 
							perf_child_detach(event);
 | 
				
			||||||
	list_del_event(event, ctx);
 | 
						list_del_event(event, ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ctx->nr_events && ctx->is_active) {
 | 
						if (!ctx->nr_events && ctx->is_active) {
 | 
				
			||||||
| 
						 | 
					@ -2363,25 +2386,21 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lockdep_assert_held(&ctx->mutex);
 | 
						lockdep_assert_held(&ctx->mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	event_function_call(event, __perf_remove_from_context, (void *)flags);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * The above event_function_call() can NO-OP when it hits
 | 
						 * Because of perf_event_exit_task(), perf_remove_from_context() ought
 | 
				
			||||||
	 * TASK_TOMBSTONE. In that case we must already have been detached
 | 
						 * to work in the face of TASK_TOMBSTONE, unlike every other
 | 
				
			||||||
	 * from the context (by perf_event_exit_event()) but the grouping
 | 
						 * event_function_call() user.
 | 
				
			||||||
	 * might still be in-tact.
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
 | 
						raw_spin_lock_irq(&ctx->lock);
 | 
				
			||||||
	if ((flags & DETACH_GROUP) &&
 | 
						if (!ctx->is_active) {
 | 
				
			||||||
	    (event->attach_state & PERF_ATTACH_GROUP)) {
 | 
							__perf_remove_from_context(event, __get_cpu_context(ctx),
 | 
				
			||||||
		/*
 | 
										   ctx, (void *)flags);
 | 
				
			||||||
		 * Since in that case we cannot possibly be scheduled, simply
 | 
					 | 
				
			||||||
		 * detach now.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		raw_spin_lock_irq(&ctx->lock);
 | 
					 | 
				
			||||||
		perf_group_detach(event);
 | 
					 | 
				
			||||||
		raw_spin_unlock_irq(&ctx->lock);
 | 
							raw_spin_unlock_irq(&ctx->lock);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						raw_spin_unlock_irq(&ctx->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event_function_call(event, __perf_remove_from_context, (void *)flags);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -12377,14 +12396,17 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 | 
					EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void sync_child_event(struct perf_event *child_event,
 | 
					static void sync_child_event(struct perf_event *child_event)
 | 
				
			||||||
			       struct task_struct *child)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event *parent_event = child_event->parent;
 | 
						struct perf_event *parent_event = child_event->parent;
 | 
				
			||||||
	u64 child_val;
 | 
						u64 child_val;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (child_event->attr.inherit_stat)
 | 
						if (child_event->attr.inherit_stat) {
 | 
				
			||||||
		perf_event_read_event(child_event, child);
 | 
							struct task_struct *task = child_event->ctx->task;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (task && task != TASK_TOMBSTONE)
 | 
				
			||||||
 | 
								perf_event_read_event(child_event, task);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	child_val = perf_event_count(child_event);
 | 
						child_val = perf_event_count(child_event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12399,60 +12421,53 @@ static void sync_child_event(struct perf_event *child_event,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
perf_event_exit_event(struct perf_event *child_event,
 | 
					perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
 | 
				
			||||||
		      struct perf_event_context *child_ctx,
 | 
					 | 
				
			||||||
		      struct task_struct *child)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event *parent_event = child_event->parent;
 | 
						struct perf_event *parent_event = event->parent;
 | 
				
			||||||
 | 
						unsigned long detach_flags = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (parent_event) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Do not destroy the 'original' grouping; because of the
 | 
				
			||||||
 | 
							 * context switch optimization the original events could've
 | 
				
			||||||
 | 
							 * ended up in a random child task.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * If we were to destroy the original group, all group related
 | 
				
			||||||
 | 
							 * operations would cease to function properly after this
 | 
				
			||||||
 | 
							 * random child dies.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Do destroy all inherited groups, we don't care about those
 | 
				
			||||||
 | 
							 * and being thorough is better.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							detach_flags = DETACH_GROUP | DETACH_CHILD;
 | 
				
			||||||
 | 
							mutex_lock(&parent_event->child_mutex);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						perf_remove_from_context(event, detach_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						raw_spin_lock_irq(&ctx->lock);
 | 
				
			||||||
 | 
						if (event->state > PERF_EVENT_STATE_EXIT)
 | 
				
			||||||
 | 
							perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
 | 
				
			||||||
 | 
						raw_spin_unlock_irq(&ctx->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Do not destroy the 'original' grouping; because of the context
 | 
						 * Child events can be freed.
 | 
				
			||||||
	 * switch optimization the original events could've ended up in a
 | 
					 | 
				
			||||||
	 * random child task.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * If we were to destroy the original group, all group related
 | 
					 | 
				
			||||||
	 * operations would cease to function properly after this random
 | 
					 | 
				
			||||||
	 * child dies.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * Do destroy all inherited groups, we don't care about those
 | 
					 | 
				
			||||||
	 * and being thorough is better.
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	raw_spin_lock_irq(&child_ctx->lock);
 | 
						if (parent_event) {
 | 
				
			||||||
	WARN_ON_ONCE(child_ctx->is_active);
 | 
							mutex_unlock(&parent_event->child_mutex);
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
	if (parent_event)
 | 
							 * Kick perf_poll() for is_event_hup();
 | 
				
			||||||
		perf_group_detach(child_event);
 | 
							 */
 | 
				
			||||||
	list_del_event(child_event, child_ctx);
 | 
							perf_event_wakeup(parent_event);
 | 
				
			||||||
	perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
 | 
							free_event(event);
 | 
				
			||||||
	raw_spin_unlock_irq(&child_ctx->lock);
 | 
							put_event(parent_event);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Parent events are governed by their filedesc, retain them.
 | 
						 * Parent events are governed by their filedesc, retain them.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!parent_event) {
 | 
						perf_event_wakeup(event);
 | 
				
			||||||
		perf_event_wakeup(child_event);
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Child events can be cleaned up.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	sync_child_event(child_event, child);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Remove this event from the parent's list
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
 | 
					 | 
				
			||||||
	mutex_lock(&parent_event->child_mutex);
 | 
					 | 
				
			||||||
	list_del_init(&child_event->child_list);
 | 
					 | 
				
			||||||
	mutex_unlock(&parent_event->child_mutex);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Kick perf_poll() for is_event_hup().
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	perf_event_wakeup(parent_event);
 | 
					 | 
				
			||||||
	free_event(child_event);
 | 
					 | 
				
			||||||
	put_event(parent_event);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 | 
					static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 | 
				
			||||||
| 
						 | 
					@ -12509,7 +12524,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 | 
				
			||||||
	perf_event_task(child, child_ctx, 0);
 | 
						perf_event_task(child, child_ctx, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 | 
						list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 | 
				
			||||||
		perf_event_exit_event(child_event, child_ctx, child);
 | 
							perf_event_exit_event(child_event, child_ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_unlock(&child_ctx->mutex);
 | 
						mutex_unlock(&child_ctx->mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12769,6 +12784,7 @@ inherit_event(struct perf_event *parent_event,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	raw_spin_lock_irqsave(&child_ctx->lock, flags);
 | 
						raw_spin_lock_irqsave(&child_ctx->lock, flags);
 | 
				
			||||||
	add_event_to_ctx(child_event, child_ctx);
 | 
						add_event_to_ctx(child_event, child_ctx);
 | 
				
			||||||
 | 
						child_event->attach_state |= PERF_ATTACH_CHILD;
 | 
				
			||||||
	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 | 
						raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue