forked from mirrors/linux
		
	bpf: introduce bpf_perf_event_output() helper
This helper is used to send raw data from eBPF program into special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event. User space needs to perf_event_open() it (either for one or all cpus) and store FD into perf_event_array (similar to bpf_perf_event_read() helper) before eBPF program can send data into it. Today the programs triggered by kprobe collect the data and either store it into the maps or print it via bpf_trace_printk() where latter is the debug facility and not suitable to stream the data. This new helper replaces such bpf_trace_printk() usage and allows programs to have dedicated channel into user space for post-processing of the raw data collected. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									fa128e6a14
								
							
						
					
					
						commit
						a43eec3042
					
				
					 5 changed files with 62 additions and 1 deletions
				
			
		| 
						 | 
					@ -287,6 +287,17 @@ enum bpf_func_id {
 | 
				
			||||||
	 * Return: realm if != 0
 | 
						 * Return: realm if != 0
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	BPF_FUNC_get_route_realm,
 | 
						BPF_FUNC_get_route_realm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/**
 | 
				
			||||||
 | 
						 * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
 | 
				
			||||||
 | 
						 * @ctx: struct pt_regs*
 | 
				
			||||||
 | 
						 * @map: pointer to perf_event_array map
 | 
				
			||||||
 | 
						 * @index: index of event in the map
 | 
				
			||||||
 | 
						 * @data: data on stack to be output as raw data
 | 
				
			||||||
 | 
						 * @size: size of data
 | 
				
			||||||
 | 
						 * Return: 0 on success
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						BPF_FUNC_perf_event_output,
 | 
				
			||||||
	__BPF_FUNC_MAX_ID,
 | 
						__BPF_FUNC_MAX_ID,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -110,6 +110,7 @@ enum perf_sw_ids {
 | 
				
			||||||
	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
 | 
						PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
 | 
				
			||||||
	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 | 
						PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 | 
				
			||||||
	PERF_COUNT_SW_DUMMY			= 9,
 | 
						PERF_COUNT_SW_DUMMY			= 9,
 | 
				
			||||||
 | 
						PERF_COUNT_SW_BPF_OUTPUT		= 10,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PERF_COUNT_SW_MAX,			/* non-ABI */
 | 
						PERF_COUNT_SW_MAX,			/* non-ABI */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 | 
				
			||||||
		return (void *)attr;
 | 
							return (void *)attr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (attr->type != PERF_TYPE_RAW &&
 | 
						if (attr->type != PERF_TYPE_RAW &&
 | 
				
			||||||
 | 
						    !(attr->type == PERF_TYPE_SOFTWARE &&
 | 
				
			||||||
 | 
						      attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
 | 
				
			||||||
	    attr->type != PERF_TYPE_HARDWARE) {
 | 
						    attr->type != PERF_TYPE_HARDWARE) {
 | 
				
			||||||
		perf_event_release_kernel(event);
 | 
							perf_event_release_kernel(event);
 | 
				
			||||||
		return ERR_PTR(-EINVAL);
 | 
							return ERR_PTR(-EINVAL);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -245,6 +245,7 @@ static const struct {
 | 
				
			||||||
} func_limit[] = {
 | 
					} func_limit[] = {
 | 
				
			||||||
	{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
 | 
						{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
 | 
				
			||||||
	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
 | 
						{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
 | 
				
			||||||
 | 
						{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void print_verifier_state(struct verifier_env *env)
 | 
					static void print_verifier_state(struct verifier_env *env)
 | 
				
			||||||
| 
						 | 
					@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 | 
				
			||||||
		 * don't allow any other map type to be passed into
 | 
							 * don't allow any other map type to be passed into
 | 
				
			||||||
		 * the special func;
 | 
							 * the special func;
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (bool_map != bool_func)
 | 
							if (bool_func && bool_map != bool_func)
 | 
				
			||||||
			return -EINVAL;
 | 
								return -EINVAL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -215,6 +215,50 @@ const struct bpf_func_proto bpf_perf_event_read_proto = {
 | 
				
			||||||
	.arg2_type	= ARG_ANYTHING,
 | 
						.arg2_type	= ARG_ANYTHING,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct pt_regs *regs = (struct pt_regs *) (long) r1;
 | 
				
			||||||
 | 
						struct bpf_map *map = (struct bpf_map *) (long) r2;
 | 
				
			||||||
 | 
						struct bpf_array *array = container_of(map, struct bpf_array, map);
 | 
				
			||||||
 | 
						void *data = (void *) (long) r4;
 | 
				
			||||||
 | 
						struct perf_sample_data sample_data;
 | 
				
			||||||
 | 
						struct perf_event *event;
 | 
				
			||||||
 | 
						struct perf_raw_record raw = {
 | 
				
			||||||
 | 
							.size = size,
 | 
				
			||||||
 | 
							.data = data,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(index >= array->map.max_entries))
 | 
				
			||||||
 | 
							return -E2BIG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event = (struct perf_event *)array->ptrs[index];
 | 
				
			||||||
 | 
						if (unlikely(!event))
 | 
				
			||||||
 | 
							return -ENOENT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
 | 
				
			||||||
 | 
							     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(event->oncpu != smp_processor_id()))
 | 
				
			||||||
 | 
							return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						perf_sample_data_init(&sample_data, 0, 0);
 | 
				
			||||||
 | 
						sample_data.raw = &raw;
 | 
				
			||||||
 | 
						perf_event_output(event, &sample_data, regs);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct bpf_func_proto bpf_perf_event_output_proto = {
 | 
				
			||||||
 | 
						.func		= bpf_perf_event_output,
 | 
				
			||||||
 | 
						.gpl_only	= false,
 | 
				
			||||||
 | 
						.ret_type	= RET_INTEGER,
 | 
				
			||||||
 | 
						.arg1_type	= ARG_PTR_TO_CTX,
 | 
				
			||||||
 | 
						.arg2_type	= ARG_CONST_MAP_PTR,
 | 
				
			||||||
 | 
						.arg3_type	= ARG_ANYTHING,
 | 
				
			||||||
 | 
						.arg4_type	= ARG_PTR_TO_STACK,
 | 
				
			||||||
 | 
						.arg5_type	= ARG_CONST_STACK_SIZE,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 | 
					static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	switch (func_id) {
 | 
						switch (func_id) {
 | 
				
			||||||
| 
						 | 
					@ -242,6 +286,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 | 
				
			||||||
		return &bpf_get_smp_processor_id_proto;
 | 
							return &bpf_get_smp_processor_id_proto;
 | 
				
			||||||
	case BPF_FUNC_perf_event_read:
 | 
						case BPF_FUNC_perf_event_read:
 | 
				
			||||||
		return &bpf_perf_event_read_proto;
 | 
							return &bpf_perf_event_read_proto;
 | 
				
			||||||
 | 
						case BPF_FUNC_perf_event_output:
 | 
				
			||||||
 | 
							return &bpf_perf_event_output_proto;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue