mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	perf: Disallow mis-matched inherited group reads
Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commitfa8c269353("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes:fa8c269353("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic <markovicbudimir@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net
This commit is contained in:
		
							parent
							
								
									58720809f5
								
							
						
					
					
						commit
						32671e3799
					
				
					 2 changed files with 34 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -704,6 +704,7 @@ struct perf_event {
 | 
			
		|||
	/* The cumulative AND of all event_caps for events in this group. */
 | 
			
		||||
	int				group_caps;
 | 
			
		||||
 | 
			
		||||
	unsigned int			group_generation;
 | 
			
		||||
	struct perf_event		*group_leader;
 | 
			
		||||
	/*
 | 
			
		||||
	 * event->pmu will always point to pmu in which this event belongs.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event)
 | 
			
		|||
 | 
			
		||||
	list_add_tail(&event->sibling_list, &group_leader->sibling_list);
 | 
			
		||||
	group_leader->nr_siblings++;
 | 
			
		||||
	group_leader->group_generation++;
 | 
			
		||||
 | 
			
		||||
	perf_event__header_size(group_leader);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event)
 | 
			
		|||
	if (leader != event) {
 | 
			
		||||
		list_del_init(&event->sibling_list);
 | 
			
		||||
		event->group_leader->nr_siblings--;
 | 
			
		||||
		event->group_leader->group_generation++;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader,
 | 
			
		|||
					u64 read_format, u64 *values)
 | 
			
		||||
{
 | 
			
		||||
	struct perf_event_context *ctx = leader->ctx;
 | 
			
		||||
	struct perf_event *sub;
 | 
			
		||||
	struct perf_event *sub, *parent;
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
	int n = 1; /* skip @nr */
 | 
			
		||||
	int ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader,
 | 
			
		|||
		return ret;
 | 
			
		||||
 | 
			
		||||
	raw_spin_lock_irqsave(&ctx->lock, flags);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Verify the grouping between the parent and child (inherited)
 | 
			
		||||
	 * events is still in tact.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Specifically:
 | 
			
		||||
	 *  - leader->ctx->lock pins leader->sibling_list
 | 
			
		||||
	 *  - parent->child_mutex pins parent->child_list
 | 
			
		||||
	 *  - parent->ctx->mutex pins parent->sibling_list
 | 
			
		||||
	 *
 | 
			
		||||
	 * Because parent->ctx != leader->ctx (and child_list nests inside
 | 
			
		||||
	 * ctx->mutex), group destruction is not atomic between children, also
 | 
			
		||||
	 * see perf_event_release_kernel(). Additionally, parent can grow the
 | 
			
		||||
	 * group.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Therefore it is possible to have parent and child groups in a
 | 
			
		||||
	 * different configuration and summing over such a beast makes no sense
 | 
			
		||||
	 * what so ever.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Reject this.
 | 
			
		||||
	 */
 | 
			
		||||
	parent = leader->parent;
 | 
			
		||||
	if (parent &&
 | 
			
		||||
	    (parent->group_generation != leader->group_generation ||
 | 
			
		||||
	     parent->nr_siblings != leader->nr_siblings)) {
 | 
			
		||||
		ret = -ECHILD;
 | 
			
		||||
		goto unlock;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Since we co-schedule groups, {enabled,running} times of siblings
 | 
			
		||||
| 
						 | 
				
			
			@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader,
 | 
			
		|||
			values[n++] = atomic64_read(&sub->lost_samples);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
unlock:
 | 
			
		||||
	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 | 
			
		||||
	return 0;
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int perf_read_group(struct perf_event *event,
 | 
			
		||||
| 
						 | 
				
			
			@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event,
 | 
			
		|||
 | 
			
		||||
	values[0] = 1 + leader->nr_siblings;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * By locking the child_mutex of the leader we effectively
 | 
			
		||||
	 * lock the child list of all siblings.. XXX explain how.
 | 
			
		||||
	 */
 | 
			
		||||
	mutex_lock(&leader->child_mutex);
 | 
			
		||||
 | 
			
		||||
	ret = __perf_read_group_add(leader, read_format, values);
 | 
			
		||||
| 
						 | 
				
			
			@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event,
 | 
			
		|||
		    !perf_get_aux_event(child_ctr, leader))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
	leader->group_generation = parent_event->group_generation;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue