mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	perf: Make perf_pmu_unregister() useable
Previously it was only safe to call perf_pmu_unregister() if there were no active events of that pmu around -- which was impossible to guarantee since it races all sorts against perf_init_event(). Rework the whole thing by: - keeping track of all events for a given pmu - 'hiding' the pmu from perf_init_event() - waiting for the appropriate (s)rcu grace periods such that all prior references to the PMU will be completed - detaching all still existing events of that pmu (see first point) and moving them to a new REVOKED state. - actually freeing the pmu data. Where notably the new REVOKED state must inhibit all event actions from reaching code that wants to use event->pmu. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com> Link: https://lkml.kernel.org/r/20250307193723.525402029@infradead.org
This commit is contained in:
		
							parent
							
								
									4da0600eda
								
							
						
					
					
						commit
						da916e96e2
					
				
					 2 changed files with 280 additions and 55 deletions
				
			
		| 
						 | 
					@ -325,6 +325,9 @@ struct perf_output_handle;
 | 
				
			||||||
struct pmu {
 | 
					struct pmu {
 | 
				
			||||||
	struct list_head		entry;
 | 
						struct list_head		entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spinlock_t			events_lock;
 | 
				
			||||||
 | 
						struct list_head		events;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct module			*module;
 | 
						struct module			*module;
 | 
				
			||||||
	struct device			*dev;
 | 
						struct device			*dev;
 | 
				
			||||||
	struct device			*parent;
 | 
						struct device			*parent;
 | 
				
			||||||
| 
						 | 
					@ -622,9 +625,10 @@ struct perf_addr_filter_range {
 | 
				
			||||||
 * enum perf_event_state - the states of an event:
 | 
					 * enum perf_event_state - the states of an event:
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
enum perf_event_state {
 | 
					enum perf_event_state {
 | 
				
			||||||
	PERF_EVENT_STATE_DEAD		= -4,
 | 
						PERF_EVENT_STATE_DEAD		= -5,
 | 
				
			||||||
	PERF_EVENT_STATE_EXIT		= -3,
 | 
						PERF_EVENT_STATE_REVOKED	= -4, /* pmu gone, must not touch */
 | 
				
			||||||
	PERF_EVENT_STATE_ERROR		= -2,
 | 
						PERF_EVENT_STATE_EXIT		= -3, /* task died, still inherit */
 | 
				
			||||||
 | 
						PERF_EVENT_STATE_ERROR		= -2, /* scheduling error, can enable */
 | 
				
			||||||
	PERF_EVENT_STATE_OFF		= -1,
 | 
						PERF_EVENT_STATE_OFF		= -1,
 | 
				
			||||||
	PERF_EVENT_STATE_INACTIVE	=  0,
 | 
						PERF_EVENT_STATE_INACTIVE	=  0,
 | 
				
			||||||
	PERF_EVENT_STATE_ACTIVE		=  1,
 | 
						PERF_EVENT_STATE_ACTIVE		=  1,
 | 
				
			||||||
| 
						 | 
					@ -865,6 +869,7 @@ struct perf_event {
 | 
				
			||||||
	void *security;
 | 
						void *security;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	struct list_head		sb_list;
 | 
						struct list_head		sb_list;
 | 
				
			||||||
 | 
						struct list_head		pmu_list;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Certain events gets forwarded to another pmu internally by over-
 | 
						 * Certain events gets forwarded to another pmu internally by over-
 | 
				
			||||||
| 
						 | 
					@ -1155,7 +1160,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
 | 
				
			||||||
extern void perf_event_itrace_started(struct perf_event *event);
 | 
					extern void perf_event_itrace_started(struct perf_event *event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
 | 
					extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
 | 
				
			||||||
extern void perf_pmu_unregister(struct pmu *pmu);
 | 
					extern int perf_pmu_unregister(struct pmu *pmu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void __perf_event_task_sched_in(struct task_struct *prev,
 | 
					extern void __perf_event_task_sched_in(struct task_struct *prev,
 | 
				
			||||||
				       struct task_struct *task);
 | 
									       struct task_struct *task);
 | 
				
			||||||
| 
						 | 
					@ -1760,7 +1765,7 @@ static inline bool needs_branch_stack(struct perf_event *event)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool has_aux(struct perf_event *event)
 | 
					static inline bool has_aux(struct perf_event *event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return event->pmu->setup_aux;
 | 
						return event->pmu && event->pmu->setup_aux;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool has_aux_action(struct perf_event *event)
 | 
					static inline bool has_aux_action(struct perf_event *event)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -208,6 +208,7 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define TASK_TOMBSTONE ((void *)-1L)
 | 
					#define TASK_TOMBSTONE ((void *)-1L)
 | 
				
			||||||
 | 
					#define EVENT_TOMBSTONE ((void *)-1L)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool is_kernel_event(struct perf_event *event)
 | 
					static bool is_kernel_event(struct perf_event *event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -2336,6 +2337,11 @@ static void perf_child_detach(struct perf_event *event)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sync_child_event(event);
 | 
						sync_child_event(event);
 | 
				
			||||||
	list_del_init(&event->child_list);
 | 
						list_del_init(&event->child_list);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Cannot set to NULL, as that would confuse the situation vs
 | 
				
			||||||
 | 
						 * not being a child event. See for example unaccount_event().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						event->parent = EVENT_TOMBSTONE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool is_orphaned_event(struct perf_event *event)
 | 
					static bool is_orphaned_event(struct perf_event *event)
 | 
				
			||||||
| 
						 | 
					@ -2457,8 +2463,9 @@ ctx_time_update_event(struct perf_event_context *ctx, struct perf_event *event)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define DETACH_GROUP	0x01UL
 | 
					#define DETACH_GROUP	0x01UL
 | 
				
			||||||
#define DETACH_CHILD	0x02UL
 | 
					#define DETACH_CHILD	0x02UL
 | 
				
			||||||
#define DETACH_DEAD	0x04UL
 | 
					#define DETACH_EXIT	0x04UL
 | 
				
			||||||
#define DETACH_EXIT	0x08UL
 | 
					#define DETACH_REVOKE	0x08UL
 | 
				
			||||||
 | 
					#define DETACH_DEAD	0x10UL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Cross CPU call to remove a performance event
 | 
					 * Cross CPU call to remove a performance event
 | 
				
			||||||
| 
						 | 
					@ -2484,18 +2491,21 @@ __perf_remove_from_context(struct perf_event *event,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (flags & DETACH_EXIT)
 | 
						if (flags & DETACH_EXIT)
 | 
				
			||||||
		state = PERF_EVENT_STATE_EXIT;
 | 
							state = PERF_EVENT_STATE_EXIT;
 | 
				
			||||||
 | 
						if (flags & DETACH_REVOKE)
 | 
				
			||||||
 | 
							state = PERF_EVENT_STATE_REVOKED;
 | 
				
			||||||
	if (flags & DETACH_DEAD) {
 | 
						if (flags & DETACH_DEAD) {
 | 
				
			||||||
		event->pending_disable = 1;
 | 
							event->pending_disable = 1;
 | 
				
			||||||
		state = PERF_EVENT_STATE_DEAD;
 | 
							state = PERF_EVENT_STATE_DEAD;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	event_sched_out(event, ctx);
 | 
						event_sched_out(event, ctx);
 | 
				
			||||||
	perf_event_set_state(event, min(event->state, state));
 | 
					 | 
				
			||||||
	if (flags & DETACH_GROUP)
 | 
						if (flags & DETACH_GROUP)
 | 
				
			||||||
		perf_group_detach(event);
 | 
							perf_group_detach(event);
 | 
				
			||||||
	if (flags & DETACH_CHILD)
 | 
						if (flags & DETACH_CHILD)
 | 
				
			||||||
		perf_child_detach(event);
 | 
							perf_child_detach(event);
 | 
				
			||||||
	list_del_event(event, ctx);
 | 
						list_del_event(event, ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event->state = min(event->state, state);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!pmu_ctx->nr_events) {
 | 
						if (!pmu_ctx->nr_events) {
 | 
				
			||||||
		pmu_ctx->rotate_necessary = 0;
 | 
							pmu_ctx->rotate_necessary = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4523,7 +4533,8 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_remove_from_owner(struct perf_event *event);
 | 
					static void perf_remove_from_owner(struct perf_event *event);
 | 
				
			||||||
static void perf_event_exit_event(struct perf_event *event,
 | 
					static void perf_event_exit_event(struct perf_event *event,
 | 
				
			||||||
				  struct perf_event_context *ctx);
 | 
									  struct perf_event_context *ctx,
 | 
				
			||||||
 | 
									  bool revoke);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Removes all events from the current task that have been marked
 | 
					 * Removes all events from the current task that have been marked
 | 
				
			||||||
| 
						 | 
					@ -4550,7 +4561,7 @@ static void perf_event_remove_on_exec(struct perf_event_context *ctx)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		modified = true;
 | 
							modified = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		perf_event_exit_event(event, ctx);
 | 
							perf_event_exit_event(event, ctx, false);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	raw_spin_lock_irqsave(&ctx->lock, flags);
 | 
						raw_spin_lock_irqsave(&ctx->lock, flags);
 | 
				
			||||||
| 
						 | 
					@ -5132,6 +5143,7 @@ static bool is_sb_event(struct perf_event *event)
 | 
				
			||||||
	    attr->context_switch || attr->text_poke ||
 | 
						    attr->context_switch || attr->text_poke ||
 | 
				
			||||||
	    attr->bpf_event)
 | 
						    attr->bpf_event)
 | 
				
			||||||
		return true;
 | 
							return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5528,6 +5540,8 @@ static void perf_free_addr_filters(struct perf_event *event);
 | 
				
			||||||
/* vs perf_event_alloc() error */
 | 
					/* vs perf_event_alloc() error */
 | 
				
			||||||
static void __free_event(struct perf_event *event)
 | 
					static void __free_event(struct perf_event *event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct pmu *pmu = event->pmu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->attach_state & PERF_ATTACH_CALLCHAIN)
 | 
						if (event->attach_state & PERF_ATTACH_CALLCHAIN)
 | 
				
			||||||
		put_callchain_buffers();
 | 
							put_callchain_buffers();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5557,6 +5571,7 @@ static void __free_event(struct perf_event *event)
 | 
				
			||||||
		 * put_pmu_ctx() needs an event->ctx reference, because of
 | 
							 * put_pmu_ctx() needs an event->ctx reference, because of
 | 
				
			||||||
		 * epc->ctx.
 | 
							 * epc->ctx.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
 | 
							WARN_ON_ONCE(!pmu);
 | 
				
			||||||
		WARN_ON_ONCE(!event->ctx);
 | 
							WARN_ON_ONCE(!event->ctx);
 | 
				
			||||||
		WARN_ON_ONCE(event->pmu_ctx->ctx != event->ctx);
 | 
							WARN_ON_ONCE(event->pmu_ctx->ctx != event->ctx);
 | 
				
			||||||
		put_pmu_ctx(event->pmu_ctx);
 | 
							put_pmu_ctx(event->pmu_ctx);
 | 
				
			||||||
| 
						 | 
					@ -5569,8 +5584,13 @@ static void __free_event(struct perf_event *event)
 | 
				
			||||||
	if (event->ctx)
 | 
						if (event->ctx)
 | 
				
			||||||
		put_ctx(event->ctx);
 | 
							put_ctx(event->ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->pmu)
 | 
						if (pmu) {
 | 
				
			||||||
		module_put(event->pmu->module);
 | 
							module_put(pmu->module);
 | 
				
			||||||
 | 
							scoped_guard (spinlock, &pmu->events_lock) {
 | 
				
			||||||
 | 
								list_del(&event->pmu_list);
 | 
				
			||||||
 | 
								wake_up_var(pmu);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	call_rcu(&event->rcu_head, free_event_rcu);
 | 
						call_rcu(&event->rcu_head, free_event_rcu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -5605,22 +5625,6 @@ static void _free_event(struct perf_event *event)
 | 
				
			||||||
	__free_event(event);
 | 
						__free_event(event);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Used to free events which have a known refcount of 1, such as in error paths
 | 
					 | 
				
			||||||
 * where the event isn't exposed yet and inherited events.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void free_event(struct perf_event *event)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
 | 
					 | 
				
			||||||
				"unexpected event refcount: %ld; ptr=%p\n",
 | 
					 | 
				
			||||||
				atomic_long_read(&event->refcount), event)) {
 | 
					 | 
				
			||||||
		/* leak to avoid use-after-free */
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	_free_event(event);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Remove user event from the owner task.
 | 
					 * Remove user event from the owner task.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -5724,7 +5728,11 @@ int perf_event_release_kernel(struct perf_event *event)
 | 
				
			||||||
	 * Thus this guarantees that we will in fact observe and kill _ALL_
 | 
						 * Thus this guarantees that we will in fact observe and kill _ALL_
 | 
				
			||||||
	 * child events.
 | 
						 * child events.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
 | 
						if (event->state > PERF_EVENT_STATE_REVOKED) {
 | 
				
			||||||
 | 
							perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							event->state = PERF_EVENT_STATE_DEAD;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	perf_event_ctx_unlock(event, ctx);
 | 
						perf_event_ctx_unlock(event, ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6013,7 +6021,7 @@ __perf_read(struct perf_event *event, char __user *buf, size_t count)
 | 
				
			||||||
	 * error state (i.e. because it was pinned but it couldn't be
 | 
						 * error state (i.e. because it was pinned but it couldn't be
 | 
				
			||||||
	 * scheduled on to the CPU at some point).
 | 
						 * scheduled on to the CPU at some point).
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (event->state == PERF_EVENT_STATE_ERROR)
 | 
						if (event->state <= PERF_EVENT_STATE_ERROR)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (count < event->read_size)
 | 
						if (count < event->read_size)
 | 
				
			||||||
| 
						 | 
					@ -6052,8 +6060,14 @@ static __poll_t perf_poll(struct file *file, poll_table *wait)
 | 
				
			||||||
	struct perf_buffer *rb;
 | 
						struct perf_buffer *rb;
 | 
				
			||||||
	__poll_t events = EPOLLHUP;
 | 
						__poll_t events = EPOLLHUP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return EPOLLERR;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	poll_wait(file, &event->waitq, wait);
 | 
						poll_wait(file, &event->waitq, wait);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return EPOLLERR;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (is_event_hup(event))
 | 
						if (is_event_hup(event))
 | 
				
			||||||
		return events;
 | 
							return events;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6232,6 +6246,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 | 
				
			||||||
	void (*func)(struct perf_event *);
 | 
						void (*func)(struct perf_event *);
 | 
				
			||||||
	u32 flags = arg;
 | 
						u32 flags = arg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return -ENODEV;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (cmd) {
 | 
						switch (cmd) {
 | 
				
			||||||
	case PERF_EVENT_IOC_ENABLE:
 | 
						case PERF_EVENT_IOC_ENABLE:
 | 
				
			||||||
		func = _perf_event_enable;
 | 
							func = _perf_event_enable;
 | 
				
			||||||
| 
						 | 
					@ -6607,9 +6624,22 @@ void ring_buffer_put(struct perf_buffer *rb)
 | 
				
			||||||
	call_rcu(&rb->rcu_head, rb_free_rcu);
 | 
						call_rcu(&rb->rcu_head, rb_free_rcu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef void (*mapped_f)(struct perf_event *event, struct mm_struct *mm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define get_mapped(event, func)			\
 | 
				
			||||||
 | 
					({	struct pmu *pmu;			\
 | 
				
			||||||
 | 
						mapped_f f = NULL;			\
 | 
				
			||||||
 | 
						guard(rcu)();				\
 | 
				
			||||||
 | 
						pmu = READ_ONCE(event->pmu);		\
 | 
				
			||||||
 | 
						if (pmu)				\
 | 
				
			||||||
 | 
							f = pmu->func;			\
 | 
				
			||||||
 | 
						f;					\
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_mmap_open(struct vm_area_struct *vma)
 | 
					static void perf_mmap_open(struct vm_area_struct *vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event *event = vma->vm_file->private_data;
 | 
						struct perf_event *event = vma->vm_file->private_data;
 | 
				
			||||||
 | 
						mapped_f mapped = get_mapped(event, event_mapped);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_inc(&event->mmap_count);
 | 
						atomic_inc(&event->mmap_count);
 | 
				
			||||||
	atomic_inc(&event->rb->mmap_count);
 | 
						atomic_inc(&event->rb->mmap_count);
 | 
				
			||||||
| 
						 | 
					@ -6617,8 +6647,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 | 
				
			||||||
	if (vma->vm_pgoff)
 | 
						if (vma->vm_pgoff)
 | 
				
			||||||
		atomic_inc(&event->rb->aux_mmap_count);
 | 
							atomic_inc(&event->rb->aux_mmap_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->pmu->event_mapped)
 | 
						if (mapped)
 | 
				
			||||||
		event->pmu->event_mapped(event, vma->vm_mm);
 | 
							mapped(event, vma->vm_mm);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_pmu_output_stop(struct perf_event *event);
 | 
					static void perf_pmu_output_stop(struct perf_event *event);
 | 
				
			||||||
| 
						 | 
					@ -6634,14 +6664,16 @@ static void perf_pmu_output_stop(struct perf_event *event);
 | 
				
			||||||
static void perf_mmap_close(struct vm_area_struct *vma)
 | 
					static void perf_mmap_close(struct vm_area_struct *vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event *event = vma->vm_file->private_data;
 | 
						struct perf_event *event = vma->vm_file->private_data;
 | 
				
			||||||
 | 
						mapped_f unmapped = get_mapped(event, event_unmapped);
 | 
				
			||||||
	struct perf_buffer *rb = ring_buffer_get(event);
 | 
						struct perf_buffer *rb = ring_buffer_get(event);
 | 
				
			||||||
	struct user_struct *mmap_user = rb->mmap_user;
 | 
						struct user_struct *mmap_user = rb->mmap_user;
 | 
				
			||||||
	int mmap_locked = rb->mmap_locked;
 | 
						int mmap_locked = rb->mmap_locked;
 | 
				
			||||||
	unsigned long size = perf_data_size(rb);
 | 
						unsigned long size = perf_data_size(rb);
 | 
				
			||||||
	bool detach_rest = false;
 | 
						bool detach_rest = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->pmu->event_unmapped)
 | 
						/* FIXIES vs perf_pmu_unregister() */
 | 
				
			||||||
		event->pmu->event_unmapped(event, vma->vm_mm);
 | 
						if (unmapped)
 | 
				
			||||||
 | 
							unmapped(event, vma->vm_mm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex
 | 
						 * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex
 | 
				
			||||||
| 
						 | 
					@ -6834,6 +6866,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
	unsigned long nr_pages;
 | 
						unsigned long nr_pages;
 | 
				
			||||||
	long user_extra = 0, extra = 0;
 | 
						long user_extra = 0, extra = 0;
 | 
				
			||||||
	int ret, flags = 0;
 | 
						int ret, flags = 0;
 | 
				
			||||||
 | 
						mapped_f mapped;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Don't allow mmap() of inherited per-task counters. This would
 | 
						 * Don't allow mmap() of inherited per-task counters. This would
 | 
				
			||||||
| 
						 | 
					@ -6864,6 +6897,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
	mutex_lock(&event->mmap_mutex);
 | 
						mutex_lock(&event->mmap_mutex);
 | 
				
			||||||
	ret = -EINVAL;
 | 
						ret = -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * This relies on __pmu_detach_event() taking mmap_mutex after marking
 | 
				
			||||||
 | 
						 * the event REVOKED. Either we observe the state, or __pmu_detach_event()
 | 
				
			||||||
 | 
						 * will detach the rb created here.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED) {
 | 
				
			||||||
 | 
							ret = -ENODEV;
 | 
				
			||||||
 | 
							goto unlock;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (vma->vm_pgoff == 0) {
 | 
						if (vma->vm_pgoff == 0) {
 | 
				
			||||||
		nr_pages -= 1;
 | 
							nr_pages -= 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7042,8 +7085,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
	if (!ret)
 | 
						if (!ret)
 | 
				
			||||||
		ret = map_range(rb, vma);
 | 
							ret = map_range(rb, vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ret && event->pmu->event_mapped)
 | 
						mapped = get_mapped(event, event_mapped);
 | 
				
			||||||
		event->pmu->event_mapped(event, vma->vm_mm);
 | 
						if (mapped)
 | 
				
			||||||
 | 
							mapped(event, vma->vm_mm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -7054,6 +7098,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
 | 
				
			||||||
	struct perf_event *event = filp->private_data;
 | 
						struct perf_event *event = filp->private_data;
 | 
				
			||||||
	int retval;
 | 
						int retval;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return -ENODEV;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inode_lock(inode);
 | 
						inode_lock(inode);
 | 
				
			||||||
	retval = fasync_helper(fd, filp, on, &event->fasync);
 | 
						retval = fasync_helper(fd, filp, on, &event->fasync);
 | 
				
			||||||
	inode_unlock(inode);
 | 
						inode_unlock(inode);
 | 
				
			||||||
| 
						 | 
					@ -11062,6 +11109,9 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;
 | 
						bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return -ENODEV;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!perf_event_is_tracing(event))
 | 
						if (!perf_event_is_tracing(event))
 | 
				
			||||||
		return perf_event_set_bpf_handler(event, prog, bpf_cookie);
 | 
							return perf_event_set_bpf_handler(event, prog, bpf_cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12245,6 +12295,9 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
 | 
				
			||||||
	if (!pmu->event_idx)
 | 
						if (!pmu->event_idx)
 | 
				
			||||||
		pmu->event_idx = perf_event_idx_default;
 | 
							pmu->event_idx = perf_event_idx_default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&pmu->events);
 | 
				
			||||||
 | 
						spin_lock_init(&pmu->events_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Now that the PMU is complete, make it visible to perf_try_init_event().
 | 
						 * Now that the PMU is complete, make it visible to perf_try_init_event().
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
| 
						 | 
					@ -12258,21 +12311,143 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(perf_pmu_register);
 | 
					EXPORT_SYMBOL_GPL(perf_pmu_register);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void perf_pmu_unregister(struct pmu *pmu)
 | 
					static void __pmu_detach_event(struct pmu *pmu, struct perf_event *event,
 | 
				
			||||||
 | 
								       struct perf_event_context *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * De-schedule the event and mark it REVOKED.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						perf_event_exit_event(event, ctx, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * All _free_event() bits that rely on event->pmu:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Notably, perf_mmap() relies on the ordering here.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						scoped_guard (mutex, &event->mmap_mutex) {
 | 
				
			||||||
 | 
							WARN_ON_ONCE(pmu->event_unmapped);
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Mostly an empty lock sequence, such that perf_mmap(), which
 | 
				
			||||||
 | 
							 * relies on mmap_mutex, is sure to observe the state change.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						perf_event_free_bpf_prog(event);
 | 
				
			||||||
 | 
						perf_free_addr_filters(event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->destroy) {
 | 
				
			||||||
 | 
							event->destroy(event);
 | 
				
			||||||
 | 
							event->destroy = NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->pmu_ctx) {
 | 
				
			||||||
 | 
							put_pmu_ctx(event->pmu_ctx);
 | 
				
			||||||
 | 
							event->pmu_ctx = NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						exclusive_event_destroy(event);
 | 
				
			||||||
 | 
						module_put(pmu->module);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event->pmu = NULL; /* force fault instead of UAF */
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void pmu_detach_event(struct pmu *pmu, struct perf_event *event)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct perf_event_context *ctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ctx = perf_event_ctx_lock(event);
 | 
				
			||||||
 | 
						__pmu_detach_event(pmu, event, ctx);
 | 
				
			||||||
 | 
						perf_event_ctx_unlock(event, ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						scoped_guard (spinlock, &pmu->events_lock)
 | 
				
			||||||
 | 
							list_del(&event->pmu_list);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct perf_event *pmu_get_event(struct pmu *pmu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct perf_event *event;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						guard(spinlock)(&pmu->events_lock);
 | 
				
			||||||
 | 
						list_for_each_entry(event, &pmu->events, pmu_list) {
 | 
				
			||||||
 | 
							if (atomic_long_inc_not_zero(&event->refcount))
 | 
				
			||||||
 | 
								return event;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool pmu_empty(struct pmu *pmu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						guard(spinlock)(&pmu->events_lock);
 | 
				
			||||||
 | 
						return list_empty(&pmu->events);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void pmu_detach_events(struct pmu *pmu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct perf_event *event;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;) {
 | 
				
			||||||
 | 
							event = pmu_get_event(pmu);
 | 
				
			||||||
 | 
							if (!event)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							pmu_detach_event(pmu, event);
 | 
				
			||||||
 | 
							put_event(event);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * wait for pending _free_event()s
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						wait_var_event(pmu, pmu_empty(pmu));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int perf_pmu_unregister(struct pmu *pmu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	scoped_guard (mutex, &pmus_lock) {
 | 
						scoped_guard (mutex, &pmus_lock) {
 | 
				
			||||||
 | 
							if (!idr_cmpxchg(&pmu_idr, pmu->type, pmu, NULL))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		list_del_rcu(&pmu->entry);
 | 
							list_del_rcu(&pmu->entry);
 | 
				
			||||||
		idr_remove(&pmu_idr, pmu->type);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We dereference the pmu list under both SRCU and regular RCU, so
 | 
						 * We dereference the pmu list under both SRCU and regular RCU, so
 | 
				
			||||||
	 * synchronize against both of those.
 | 
						 * synchronize against both of those.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Notably, the entirety of event creation, from perf_init_event()
 | 
				
			||||||
 | 
						 * (which will now fail, because of the above) until
 | 
				
			||||||
 | 
						 * perf_install_in_context() should be under SRCU such that
 | 
				
			||||||
 | 
						 * this synchronizes against event creation. This avoids trying to
 | 
				
			||||||
 | 
						 * detach events that are not fully formed.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	synchronize_srcu(&pmus_srcu);
 | 
						synchronize_srcu(&pmus_srcu);
 | 
				
			||||||
	synchronize_rcu();
 | 
						synchronize_rcu();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (pmu->event_unmapped && !pmu_empty(pmu)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Can't force remove events when pmu::event_unmapped()
 | 
				
			||||||
 | 
							 * is used in perf_mmap_close().
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							guard(mutex)(&pmus_lock);
 | 
				
			||||||
 | 
							idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu);
 | 
				
			||||||
 | 
							list_add_rcu(&pmu->entry, &pmus);
 | 
				
			||||||
 | 
							return -EBUSY;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						scoped_guard (mutex, &pmus_lock)
 | 
				
			||||||
 | 
							idr_remove(&pmu_idr, pmu->type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * PMU is removed from the pmus list, so no new events will
 | 
				
			||||||
 | 
						 * be created, now take care of the existing ones.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						pmu_detach_events(pmu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * PMU is unused, make it go away.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	perf_pmu_free(pmu);
 | 
						perf_pmu_free(pmu);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 | 
					EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12366,7 +12541,7 @@ static struct pmu *perf_init_event(struct perf_event *event)
 | 
				
			||||||
	struct pmu *pmu;
 | 
						struct pmu *pmu;
 | 
				
			||||||
	int type, ret;
 | 
						int type, ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	guard(srcu)(&pmus_srcu);
 | 
						guard(srcu)(&pmus_srcu); /* pmu idr/list access */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Save original type before calling pmu->event_init() since certain
 | 
						 * Save original type before calling pmu->event_init() since certain
 | 
				
			||||||
| 
						 | 
					@ -12590,6 +12765,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
	INIT_LIST_HEAD(&event->active_entry);
 | 
						INIT_LIST_HEAD(&event->active_entry);
 | 
				
			||||||
	INIT_LIST_HEAD(&event->addr_filters.list);
 | 
						INIT_LIST_HEAD(&event->addr_filters.list);
 | 
				
			||||||
	INIT_HLIST_NODE(&event->hlist_entry);
 | 
						INIT_HLIST_NODE(&event->hlist_entry);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&event->pmu_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	init_waitqueue_head(&event->waitq);
 | 
						init_waitqueue_head(&event->waitq);
 | 
				
			||||||
| 
						 | 
					@ -12768,6 +12944,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
	/* symmetric to unaccount_event() in _free_event() */
 | 
						/* symmetric to unaccount_event() in _free_event() */
 | 
				
			||||||
	account_event(event);
 | 
						account_event(event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Event creation should be under SRCU, see perf_pmu_unregister().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						lockdep_assert_held(&pmus_srcu);
 | 
				
			||||||
 | 
						scoped_guard (spinlock, &pmu->events_lock)
 | 
				
			||||||
 | 
							list_add(&event->pmu_list, &pmu->events);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return_ptr(event);
 | 
						return_ptr(event);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12967,6 +13150,9 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (output_event) {
 | 
						if (output_event) {
 | 
				
			||||||
 | 
							if (output_event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
								goto unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* get the rb we want to redirect to */
 | 
							/* get the rb we want to redirect to */
 | 
				
			||||||
		rb = ring_buffer_get(output_event);
 | 
							rb = ring_buffer_get(output_event);
 | 
				
			||||||
		if (!rb)
 | 
							if (!rb)
 | 
				
			||||||
| 
						 | 
					@ -13148,6 +13334,11 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
	if (event_fd < 0)
 | 
						if (event_fd < 0)
 | 
				
			||||||
		return event_fd;
 | 
							return event_fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Event creation should be under SRCU, see perf_pmu_unregister().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						guard(srcu)(&pmus_srcu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	CLASS(fd, group)(group_fd);     // group_fd == -1 => empty
 | 
						CLASS(fd, group)(group_fd);     // group_fd == -1 => empty
 | 
				
			||||||
	if (group_fd != -1) {
 | 
						if (group_fd != -1) {
 | 
				
			||||||
		if (!is_perf_file(group)) {
 | 
							if (!is_perf_file(group)) {
 | 
				
			||||||
| 
						 | 
					@ -13155,6 +13346,10 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
			goto err_fd;
 | 
								goto err_fd;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		group_leader = fd_file(group)->private_data;
 | 
							group_leader = fd_file(group)->private_data;
 | 
				
			||||||
 | 
							if (group_leader->state <= PERF_EVENT_STATE_REVOKED) {
 | 
				
			||||||
 | 
								err = -ENODEV;
 | 
				
			||||||
 | 
								goto err_fd;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		if (flags & PERF_FLAG_FD_OUTPUT)
 | 
							if (flags & PERF_FLAG_FD_OUTPUT)
 | 
				
			||||||
			output_event = group_leader;
 | 
								output_event = group_leader;
 | 
				
			||||||
		if (flags & PERF_FLAG_FD_NO_GROUP)
 | 
							if (flags & PERF_FLAG_FD_NO_GROUP)
 | 
				
			||||||
| 
						 | 
					@ -13451,7 +13646,7 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
				
			||||||
	if (task)
 | 
						if (task)
 | 
				
			||||||
		up_read(&task->signal->exec_update_lock);
 | 
							up_read(&task->signal->exec_update_lock);
 | 
				
			||||||
err_alloc:
 | 
					err_alloc:
 | 
				
			||||||
	free_event(event);
 | 
						put_event(event);
 | 
				
			||||||
err_task:
 | 
					err_task:
 | 
				
			||||||
	if (task)
 | 
						if (task)
 | 
				
			||||||
		put_task_struct(task);
 | 
							put_task_struct(task);
 | 
				
			||||||
| 
						 | 
					@ -13488,6 +13683,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
	if (attr->aux_output || attr->aux_action)
 | 
						if (attr->aux_output || attr->aux_action)
 | 
				
			||||||
		return ERR_PTR(-EINVAL);
 | 
							return ERR_PTR(-EINVAL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Event creation should be under SRCU, see perf_pmu_unregister().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						guard(srcu)(&pmus_srcu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
 | 
						event = perf_event_alloc(attr, cpu, task, NULL, NULL,
 | 
				
			||||||
				 overflow_handler, context, -1);
 | 
									 overflow_handler, context, -1);
 | 
				
			||||||
	if (IS_ERR(event)) {
 | 
						if (IS_ERR(event)) {
 | 
				
			||||||
| 
						 | 
					@ -13559,7 +13759,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 | 
				
			||||||
	perf_unpin_context(ctx);
 | 
						perf_unpin_context(ctx);
 | 
				
			||||||
	put_ctx(ctx);
 | 
						put_ctx(ctx);
 | 
				
			||||||
err_alloc:
 | 
					err_alloc:
 | 
				
			||||||
	free_event(event);
 | 
						put_event(event);
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
	return ERR_PTR(err);
 | 
						return ERR_PTR(err);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -13699,10 +13899,15 @@ static void sync_child_event(struct perf_event *child_event)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
 | 
					perf_event_exit_event(struct perf_event *event,
 | 
				
			||||||
 | 
							      struct perf_event_context *ctx, bool revoke)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct perf_event *parent_event = event->parent;
 | 
						struct perf_event *parent_event = event->parent;
 | 
				
			||||||
	unsigned long detach_flags = 0;
 | 
						unsigned long detach_flags = DETACH_EXIT;
 | 
				
			||||||
 | 
						bool is_child = !!parent_event;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (parent_event == EVENT_TOMBSTONE)
 | 
				
			||||||
 | 
							parent_event = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (parent_event) {
 | 
						if (parent_event) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
| 
						 | 
					@ -13717,22 +13922,29 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
 | 
				
			||||||
		 * Do destroy all inherited groups, we don't care about those
 | 
							 * Do destroy all inherited groups, we don't care about those
 | 
				
			||||||
		 * and being thorough is better.
 | 
							 * and being thorough is better.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		detach_flags = DETACH_GROUP | DETACH_CHILD;
 | 
							detach_flags |= DETACH_GROUP | DETACH_CHILD;
 | 
				
			||||||
		mutex_lock(&parent_event->child_mutex);
 | 
							mutex_lock(&parent_event->child_mutex);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	perf_remove_from_context(event, detach_flags | DETACH_EXIT);
 | 
						if (revoke)
 | 
				
			||||||
 | 
							detach_flags |= DETACH_GROUP | DETACH_REVOKE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						perf_remove_from_context(event, detach_flags);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Child events can be freed.
 | 
						 * Child events can be freed.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (parent_event) {
 | 
						if (is_child) {
 | 
				
			||||||
		mutex_unlock(&parent_event->child_mutex);
 | 
							if (parent_event) {
 | 
				
			||||||
		/*
 | 
								mutex_unlock(&parent_event->child_mutex);
 | 
				
			||||||
		 * Kick perf_poll() for is_event_hup();
 | 
								/*
 | 
				
			||||||
		 */
 | 
								 * Kick perf_poll() for is_event_hup();
 | 
				
			||||||
		perf_event_wakeup(parent_event);
 | 
								 */
 | 
				
			||||||
		put_event(event);
 | 
								perf_event_wakeup(parent_event);
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * pmu_detach_event() will have an extra refcount.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								put_event(event);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13796,7 +14008,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
 | 
				
			||||||
		perf_event_task(task, ctx, 0);
 | 
							perf_event_task(task, ctx, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
 | 
						list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
 | 
				
			||||||
		perf_event_exit_event(child_event, ctx);
 | 
							perf_event_exit_event(child_event, ctx, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_unlock(&ctx->mutex);
 | 
						mutex_unlock(&ctx->mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13949,6 +14161,14 @@ inherit_event(struct perf_event *parent_event,
 | 
				
			||||||
	if (parent_event->parent)
 | 
						if (parent_event->parent)
 | 
				
			||||||
		parent_event = parent_event->parent;
 | 
							parent_event = parent_event->parent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (parent_event->state <= PERF_EVENT_STATE_REVOKED)
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Event creation should be under SRCU, see perf_pmu_unregister().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						guard(srcu)(&pmus_srcu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	child_event = perf_event_alloc(&parent_event->attr,
 | 
						child_event = perf_event_alloc(&parent_event->attr,
 | 
				
			||||||
					   parent_event->cpu,
 | 
										   parent_event->cpu,
 | 
				
			||||||
					   child,
 | 
										   child,
 | 
				
			||||||
| 
						 | 
					@ -13962,7 +14182,7 @@ inherit_event(struct perf_event *parent_event,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
 | 
						pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
 | 
				
			||||||
	if (IS_ERR(pmu_ctx)) {
 | 
						if (IS_ERR(pmu_ctx)) {
 | 
				
			||||||
		free_event(child_event);
 | 
							put_event(child_event);
 | 
				
			||||||
		return ERR_CAST(pmu_ctx);
 | 
							return ERR_CAST(pmu_ctx);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	child_event->pmu_ctx = pmu_ctx;
 | 
						child_event->pmu_ctx = pmu_ctx;
 | 
				
			||||||
| 
						 | 
					@ -13977,7 +14197,7 @@ inherit_event(struct perf_event *parent_event,
 | 
				
			||||||
	if (is_orphaned_event(parent_event) ||
 | 
						if (is_orphaned_event(parent_event) ||
 | 
				
			||||||
	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 | 
						    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 | 
				
			||||||
		mutex_unlock(&parent_event->child_mutex);
 | 
							mutex_unlock(&parent_event->child_mutex);
 | 
				
			||||||
		free_event(child_event);
 | 
							put_event(child_event);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue