forked from mirrors/linux
		
	perf core: Per event callchain limit
Additionally to being able to control the system wide maximum depth via /proc/sys/kernel/perf_event_max_stack, now we are able to ask for different depths per event, using perf_event_attr.sample_max_stack for that. This uses an u16 hole at the end of perf_event_attr, that, when perf_event_attr.sample_type has the PERF_SAMPLE_CALLCHAIN, if sample_max_stack is zero, means use perf_event_max_stack, otherwise it'll be bounds checked under callchain_mutex. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Wang Nan <wangnan0@huawei.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
		
							parent
							
								
									480ca357fd
								
							
						
					
					
						commit
						97c79a38cd
					
				
					 5 changed files with 23 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 | 
			
		|||
extern struct perf_callchain_entry *
 | 
			
		||||
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 | 
			
		||||
		   u32 max_stack, bool crosstask, bool add_mark);
 | 
			
		||||
extern int get_callchain_buffers(void);
 | 
			
		||||
extern int get_callchain_buffers(int max_stack);
 | 
			
		||||
extern void put_callchain_buffers(void);
 | 
			
		||||
 | 
			
		||||
extern int sysctl_perf_event_max_stack;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -276,6 +276,9 @@ enum perf_event_read_format {
 | 
			
		|||
 | 
			
		||||
/*
 | 
			
		||||
 * Hardware event_id to monitor via a performance monitoring event:
 | 
			
		||||
 *
 | 
			
		||||
 * @sample_max_stack: Max number of frame pointers in a callchain,
 | 
			
		||||
 *		      should be < /proc/sys/kernel/perf_event_max_stack
 | 
			
		||||
 */
 | 
			
		||||
struct perf_event_attr {
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -385,7 +388,8 @@ struct perf_event_attr {
 | 
			
		|||
	 * Wakeup watermark for AUX area
 | 
			
		||||
	 */
 | 
			
		||||
	__u32	aux_watermark;
 | 
			
		||||
	__u32	__reserved_2;	/* align to __u64 */
 | 
			
		||||
	__u16	sample_max_stack;
 | 
			
		||||
	__u16	__reserved_2;	/* align to __u64 */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define perf_flags(attr)	(*(&(attr)->read_format + 1))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 | 
			
		|||
	if (err)
 | 
			
		||||
		goto free_smap;
 | 
			
		||||
 | 
			
		||||
	err = get_callchain_buffers();
 | 
			
		||||
	err = get_callchain_buffers(sysctl_perf_event_max_stack);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto free_smap;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -104,7 +104,7 @@ static int alloc_callchain_buffers(void)
 | 
			
		|||
	return -ENOMEM;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int get_callchain_buffers(void)
 | 
			
		||||
int get_callchain_buffers(int event_max_stack)
 | 
			
		||||
{
 | 
			
		||||
	int err = 0;
 | 
			
		||||
	int count;
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +121,15 @@ int get_callchain_buffers(void)
 | 
			
		|||
		/* If the allocation failed, give up */
 | 
			
		||||
		if (!callchain_cpus_entries)
 | 
			
		||||
			err = -ENOMEM;
 | 
			
		||||
		/*
 | 
			
		||||
		 * If requesting per event more than the global cap,
 | 
			
		||||
		 * return a different error to help userspace figure
 | 
			
		||||
		 * this out.
 | 
			
		||||
		 *
 | 
			
		||||
		 * And also do it here so that we have &callchain_mutex held.
 | 
			
		||||
		 */
 | 
			
		||||
		if (event_max_stack > sysctl_perf_event_max_stack)
 | 
			
		||||
			err = -EOVERFLOW;
 | 
			
		||||
		goto exit;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 | 
			
		|||
	bool user   = !event->attr.exclude_callchain_user;
 | 
			
		||||
	/* Disallow cross-task user callchains. */
 | 
			
		||||
	bool crosstask = event->ctx->task && event->ctx->task != current;
 | 
			
		||||
	const u32 max_stack = event->attr.sample_max_stack;
 | 
			
		||||
 | 
			
		||||
	if (!kernel && !user)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
 | 
			
		||||
	return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct perf_callchain_entry *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 | 
			
		|||
 | 
			
		||||
	if (!event->parent) {
 | 
			
		||||
		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 | 
			
		||||
			err = get_callchain_buffers();
 | 
			
		||||
			err = get_callchain_buffers(attr->sample_max_stack);
 | 
			
		||||
			if (err)
 | 
			
		||||
				goto err_addr_filters;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
 | 
			
		|||
			return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!attr.sample_max_stack)
 | 
			
		||||
		attr.sample_max_stack = sysctl_perf_event_max_stack;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * In cgroup mode, the pid argument is used to pass the fd
 | 
			
		||||
	 * opened to the cgroup directory in cgroupfs. The cpu argument
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue