forked from mirrors/linux
		
	bpf: permit multiple bpf attachments for a single perf event
This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song <yhs@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									0b4c6841fe
								
							
						
					
					
						commit
						e87c6bc385
					
				
					 9 changed files with 255 additions and 56 deletions
				
			
		| 
						 | 
					@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
 | 
				
			||||||
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 | 
					int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 | 
				
			||||||
				__u32 __user *prog_ids, u32 cnt);
 | 
									__u32 __user *prog_ids, u32 cnt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
 | 
					void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
 | 
				
			||||||
 | 
									struct bpf_prog *old_prog);
 | 
				
			||||||
 | 
					int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 | 
				
			||||||
 | 
								struct bpf_prog *exclude_prog,
 | 
				
			||||||
 | 
								struct bpf_prog *include_prog,
 | 
				
			||||||
 | 
								struct bpf_prog_array **new_array);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 | 
				
			||||||
	({						\
 | 
						({						\
 | 
				
			||||||
		struct bpf_prog **_prog;		\
 | 
							struct bpf_prog **_prog, *__prog;	\
 | 
				
			||||||
 | 
							struct bpf_prog_array *_array;		\
 | 
				
			||||||
		u32 _ret = 1;				\
 | 
							u32 _ret = 1;				\
 | 
				
			||||||
		rcu_read_lock();			\
 | 
							rcu_read_lock();			\
 | 
				
			||||||
		_prog = rcu_dereference(array)->progs;	\
 | 
							_array = rcu_dereference(array);	\
 | 
				
			||||||
		for (; *_prog; _prog++)			\
 | 
							if (unlikely(check_non_null && !_array))\
 | 
				
			||||||
			_ret &= func(*_prog, ctx);	\
 | 
								goto _out;			\
 | 
				
			||||||
 | 
							_prog = _array->progs;			\
 | 
				
			||||||
 | 
							while ((__prog = READ_ONCE(*_prog))) {	\
 | 
				
			||||||
 | 
								_ret &= func(__prog, ctx);	\
 | 
				
			||||||
 | 
								_prog++;			\
 | 
				
			||||||
 | 
							}					\
 | 
				
			||||||
 | 
					_out:							\
 | 
				
			||||||
		rcu_read_unlock();			\
 | 
							rcu_read_unlock();			\
 | 
				
			||||||
		_ret;					\
 | 
							_ret;					\
 | 
				
			||||||
	 })
 | 
						 })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
 | 
				
			||||||
 | 
						__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
 | 
				
			||||||
 | 
						__BPF_PROG_RUN_ARRAY(array, ctx, func, true)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_BPF_SYSCALL
 | 
					#ifdef CONFIG_BPF_SYSCALL
 | 
				
			||||||
DECLARE_PER_CPU(int, bpf_prog_active);
 | 
					DECLARE_PER_CPU(int, bpf_prog_active);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -271,14 +271,37 @@ struct trace_event_call {
 | 
				
			||||||
#ifdef CONFIG_PERF_EVENTS
 | 
					#ifdef CONFIG_PERF_EVENTS
 | 
				
			||||||
	int				perf_refcount;
 | 
						int				perf_refcount;
 | 
				
			||||||
	struct hlist_head __percpu	*perf_events;
 | 
						struct hlist_head __percpu	*perf_events;
 | 
				
			||||||
	struct bpf_prog			*prog;
 | 
						struct bpf_prog_array __rcu	*prog_array;
 | 
				
			||||||
	struct perf_event		*bpf_prog_owner;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int	(*perf_perm)(struct trace_event_call *,
 | 
						int	(*perf_perm)(struct trace_event_call *,
 | 
				
			||||||
			     struct perf_event *);
 | 
								     struct perf_event *);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_PERF_EVENTS
 | 
				
			||||||
 | 
					static inline bool bpf_prog_array_valid(struct trace_event_call *call)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * This inline function checks whether call->prog_array
 | 
				
			||||||
 | 
						 * is valid or not. The function is called in various places,
 | 
				
			||||||
 | 
						 * outside rcu_read_lock/unlock, as a heuristic to speed up execution.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * If this function returns true, and later call->prog_array
 | 
				
			||||||
 | 
						 * becomes false inside rcu_read_lock/unlock region,
 | 
				
			||||||
 | 
						 * we bail out then. If this function return false,
 | 
				
			||||||
 | 
						 * there is a risk that we might miss a few events if the checking
 | 
				
			||||||
 | 
						 * were delayed until inside rcu_read_lock/unlock region and
 | 
				
			||||||
 | 
						 * call->prog_array happened to become non-NULL then.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Here, READ_ONCE() is used instead of rcu_access_pointer().
 | 
				
			||||||
 | 
						 * rcu_access_pointer() requires the actual definition of
 | 
				
			||||||
 | 
						 * "struct bpf_prog_array" while READ_ONCE() only needs
 | 
				
			||||||
 | 
						 * a declaration of the same type.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						return !!READ_ONCE(call->prog_array);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline const char *
 | 
					static inline const char *
 | 
				
			||||||
trace_event_name(struct trace_event_call *call)
 | 
					trace_event_name(struct trace_event_call *call)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_BPF_EVENTS
 | 
					#ifdef CONFIG_BPF_EVENTS
 | 
				
			||||||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 | 
					unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
 | 
				
			||||||
 | 
					int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
 | 
				
			||||||
 | 
					void perf_event_detach_bpf_prog(struct perf_event *event);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 | 
					static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int
 | 
				
			||||||
 | 
					perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
| 
						 | 
					@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
 | 
						perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* _LINUX_TRACE_EVENT_H */
 | 
					#endif /* _LINUX_TRACE_EVENT_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto)					\
 | 
				
			||||||
	struct trace_event_call *event_call = __data;			\
 | 
						struct trace_event_call *event_call = __data;			\
 | 
				
			||||||
	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
 | 
						struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
 | 
				
			||||||
	struct trace_event_raw_##call *entry;				\
 | 
						struct trace_event_raw_##call *entry;				\
 | 
				
			||||||
	struct bpf_prog *prog = event_call->prog;			\
 | 
					 | 
				
			||||||
	struct pt_regs *__regs;						\
 | 
						struct pt_regs *__regs;						\
 | 
				
			||||||
	u64 __count = 1;						\
 | 
						u64 __count = 1;						\
 | 
				
			||||||
	struct task_struct *__task = NULL;				\
 | 
						struct task_struct *__task = NULL;				\
 | 
				
			||||||
| 
						 | 
					@ -46,7 +45,8 @@ perf_trace_##call(void *__data, proto)					\
 | 
				
			||||||
	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
 | 
						__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
 | 
				
			||||||
									\
 | 
														\
 | 
				
			||||||
	head = this_cpu_ptr(event_call->perf_events);			\
 | 
						head = this_cpu_ptr(event_call->perf_events);			\
 | 
				
			||||||
	if (!prog && __builtin_constant_p(!__task) && !__task &&	\
 | 
						if (!bpf_prog_array_valid(event_call) &&			\
 | 
				
			||||||
 | 
						    __builtin_constant_p(!__task) && !__task &&			\
 | 
				
			||||||
	    hlist_empty(head))						\
 | 
						    hlist_empty(head))						\
 | 
				
			||||||
		return;							\
 | 
							return;							\
 | 
				
			||||||
									\
 | 
														\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 | 
					EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static unsigned int __bpf_prog_ret1(const void *ctx,
 | 
				
			||||||
 | 
									    const struct bpf_insn *insn)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct bpf_prog_dummy {
 | 
				
			||||||
 | 
						struct bpf_prog prog;
 | 
				
			||||||
 | 
					} dummy_bpf_prog = {
 | 
				
			||||||
 | 
						.prog = {
 | 
				
			||||||
 | 
							.bpf_func = __bpf_prog_ret1,
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* to avoid allocating empty bpf_prog_array for cgroups that
 | 
					/* to avoid allocating empty bpf_prog_array for cgroups that
 | 
				
			||||||
 * don't have bpf program attached use one global 'empty_prog_array'
 | 
					 * don't have bpf program attached use one global 'empty_prog_array'
 | 
				
			||||||
 * It will not be modified the caller of bpf_prog_array_alloc()
 | 
					 * It will not be modified the caller of bpf_prog_array_alloc()
 | 
				
			||||||
| 
						 | 
					@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
 | 
				
			||||||
 | 
									struct bpf_prog *old_prog)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bpf_prog **prog = progs->progs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (; *prog; prog++)
 | 
				
			||||||
 | 
							if (*prog == old_prog) {
 | 
				
			||||||
 | 
								WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 | 
				
			||||||
 | 
								struct bpf_prog *exclude_prog,
 | 
				
			||||||
 | 
								struct bpf_prog *include_prog,
 | 
				
			||||||
 | 
								struct bpf_prog_array **new_array)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int new_prog_cnt, carry_prog_cnt = 0;
 | 
				
			||||||
 | 
						struct bpf_prog **existing_prog;
 | 
				
			||||||
 | 
						struct bpf_prog_array *array;
 | 
				
			||||||
 | 
						int new_prog_idx = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Figure out how many existing progs we need to carry over to
 | 
				
			||||||
 | 
						 * the new array.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (old_array) {
 | 
				
			||||||
 | 
							existing_prog = old_array->progs;
 | 
				
			||||||
 | 
							for (; *existing_prog; existing_prog++) {
 | 
				
			||||||
 | 
								if (*existing_prog != exclude_prog &&
 | 
				
			||||||
 | 
								    *existing_prog != &dummy_bpf_prog.prog)
 | 
				
			||||||
 | 
									carry_prog_cnt++;
 | 
				
			||||||
 | 
								if (*existing_prog == include_prog)
 | 
				
			||||||
 | 
									return -EEXIST;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* How many progs (not NULL) will be in the new array? */
 | 
				
			||||||
 | 
						new_prog_cnt = carry_prog_cnt;
 | 
				
			||||||
 | 
						if (include_prog)
 | 
				
			||||||
 | 
							new_prog_cnt += 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do we have any prog (not NULL) in the new array? */
 | 
				
			||||||
 | 
						if (!new_prog_cnt) {
 | 
				
			||||||
 | 
							*new_array = NULL;
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* +1 as the end of prog_array is marked with NULL */
 | 
				
			||||||
 | 
						array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!array)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Fill in the new prog array */
 | 
				
			||||||
 | 
						if (carry_prog_cnt) {
 | 
				
			||||||
 | 
							existing_prog = old_array->progs;
 | 
				
			||||||
 | 
							for (; *existing_prog; existing_prog++)
 | 
				
			||||||
 | 
								if (*existing_prog != exclude_prog &&
 | 
				
			||||||
 | 
								    *existing_prog != &dummy_bpf_prog.prog)
 | 
				
			||||||
 | 
									array->progs[new_prog_idx++] = *existing_prog;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (include_prog)
 | 
				
			||||||
 | 
							array->progs[new_prog_idx++] = include_prog;
 | 
				
			||||||
 | 
						array->progs[new_prog_idx] = NULL;
 | 
				
			||||||
 | 
						*new_array = array;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void bpf_prog_free_deferred(struct work_struct *work)
 | 
					static void bpf_prog_free_deferred(struct work_struct *work)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct bpf_prog_aux *aux;
 | 
						struct bpf_prog_aux *aux;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 | 
				
			||||||
			       struct pt_regs *regs, struct hlist_head *head,
 | 
								       struct pt_regs *regs, struct hlist_head *head,
 | 
				
			||||||
			       struct task_struct *task)
 | 
								       struct task_struct *task)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct bpf_prog *prog = call->prog;
 | 
						if (bpf_prog_array_valid(call)) {
 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (prog) {
 | 
					 | 
				
			||||||
		*(struct pt_regs **)raw_data = regs;
 | 
							*(struct pt_regs **)raw_data = regs;
 | 
				
			||||||
		if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
 | 
							if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
 | 
				
			||||||
			perf_swevent_put_recursion_context(rctx);
 | 
								perf_swevent_put_recursion_context(rctx);
 | 
				
			||||||
			return;
 | 
								return;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	bool is_kprobe, is_tracepoint, is_syscall_tp;
 | 
						bool is_kprobe, is_tracepoint, is_syscall_tp;
 | 
				
			||||||
	struct bpf_prog *prog;
 | 
						struct bpf_prog *prog;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 | 
						if (event->attr.type != PERF_TYPE_TRACEPOINT)
 | 
				
			||||||
		return perf_event_set_bpf_handler(event, prog_fd);
 | 
							return perf_event_set_bpf_handler(event, prog_fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (event->tp_event->prog)
 | 
					 | 
				
			||||||
		return -EEXIST;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
 | 
						is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
 | 
				
			||||||
	is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
 | 
						is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
 | 
				
			||||||
	is_syscall_tp = is_syscall_trace_event(event->tp_event);
 | 
						is_syscall_tp = is_syscall_trace_event(event->tp_event);
 | 
				
			||||||
| 
						 | 
					@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 | 
				
			||||||
			return -EACCES;
 | 
								return -EACCES;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	event->tp_event->prog = prog;
 | 
					 | 
				
			||||||
	event->tp_event->bpf_prog_owner = event;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						ret = perf_event_attach_bpf_prog(event, prog);
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							bpf_prog_put(prog);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_event_free_bpf_prog(struct perf_event *event)
 | 
					static void perf_event_free_bpf_prog(struct perf_event *event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct bpf_prog *prog;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (event->attr.type != PERF_TYPE_TRACEPOINT) {
 | 
						if (event->attr.type != PERF_TYPE_TRACEPOINT) {
 | 
				
			||||||
		perf_event_free_bpf_handler(event);
 | 
							perf_event_free_bpf_handler(event);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						perf_event_detach_bpf_prog(event);
 | 
				
			||||||
	prog = event->tp_event->prog;
 | 
					 | 
				
			||||||
	if (prog && event->tp_event->bpf_prog_owner == event) {
 | 
					 | 
				
			||||||
		event->tp_event->prog = NULL;
 | 
					 | 
				
			||||||
		bpf_prog_put(prog);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -17,7 +17,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * trace_call_bpf - invoke BPF program
 | 
					 * trace_call_bpf - invoke BPF program
 | 
				
			||||||
 * @prog: BPF program
 | 
					 * @call: tracepoint event
 | 
				
			||||||
 * @ctx: opaque context pointer
 | 
					 * @ctx: opaque context pointer
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * kprobe handlers execute BPF programs via this helper.
 | 
					 * kprobe handlers execute BPF programs via this helper.
 | 
				
			||||||
| 
						 | 
					@ -29,7 +29,7 @@
 | 
				
			||||||
 * 1 - store kprobe event into ring buffer
 | 
					 * 1 - store kprobe event into ring buffer
 | 
				
			||||||
 * Other values are reserved and currently alias to 1
 | 
					 * Other values are reserved and currently alias to 1
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 | 
					unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int ret;
 | 
						unsigned int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rcu_read_lock();
 | 
						/*
 | 
				
			||||||
	ret = BPF_PROG_RUN(prog, ctx);
 | 
						 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
 | 
				
			||||||
	rcu_read_unlock();
 | 
						 * to all call sites, we did a bpf_prog_array_valid() there to check
 | 
				
			||||||
 | 
						 * whether call->prog_array is empty or not, which is
 | 
				
			||||||
 | 
						 * a heurisitc to speed up execution.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * If bpf_prog_array_valid() fetched prog_array was
 | 
				
			||||||
 | 
						 * non-NULL, we go into trace_call_bpf() and do the actual
 | 
				
			||||||
 | 
						 * proper rcu_dereference() under RCU lock.
 | 
				
			||||||
 | 
						 * If it turns out that prog_array is NULL then, we bail out.
 | 
				
			||||||
 | 
						 * For the opposite, if the bpf_prog_array_valid() fetched pointer
 | 
				
			||||||
 | 
						 * was NULL, you'll skip the prog_array with the risk of missing
 | 
				
			||||||
 | 
						 * out of events when it was updated in between this and the
 | 
				
			||||||
 | 
						 * rcu_dereference() which is accepted risk.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 out:
 | 
					 out:
 | 
				
			||||||
	__this_cpu_dec(bpf_prog_active);
 | 
						__this_cpu_dec(bpf_prog_active);
 | 
				
			||||||
| 
						 | 
					@ -741,3 +754,62 @@ const struct bpf_verifier_ops perf_event_verifier_ops = {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const struct bpf_prog_ops perf_event_prog_ops = {
 | 
					const struct bpf_prog_ops perf_event_prog_ops = {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static DEFINE_MUTEX(bpf_event_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int perf_event_attach_bpf_prog(struct perf_event *event,
 | 
				
			||||||
 | 
								       struct bpf_prog *prog)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bpf_prog_array __rcu *old_array;
 | 
				
			||||||
 | 
						struct bpf_prog_array *new_array;
 | 
				
			||||||
 | 
						int ret = -EEXIST;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&bpf_event_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->prog)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						old_array = rcu_dereference_protected(event->tp_event->prog_array,
 | 
				
			||||||
 | 
										      lockdep_is_held(&bpf_event_mutex));
 | 
				
			||||||
 | 
						ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* set the new array to event->tp_event and set event->prog */
 | 
				
			||||||
 | 
						event->prog = prog;
 | 
				
			||||||
 | 
						rcu_assign_pointer(event->tp_event->prog_array, new_array);
 | 
				
			||||||
 | 
						bpf_prog_array_free(old_array);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						mutex_unlock(&bpf_event_mutex);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void perf_event_detach_bpf_prog(struct perf_event *event)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bpf_prog_array __rcu *old_array;
 | 
				
			||||||
 | 
						struct bpf_prog_array *new_array;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&bpf_event_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!event->prog)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						old_array = rcu_dereference_protected(event->tp_event->prog_array,
 | 
				
			||||||
 | 
										      lockdep_is_held(&bpf_event_mutex));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
 | 
				
			||||||
 | 
						if (ret < 0) {
 | 
				
			||||||
 | 
							bpf_prog_array_delete_safe(old_array, event->prog);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							rcu_assign_pointer(event->tp_event->prog_array, new_array);
 | 
				
			||||||
 | 
							bpf_prog_array_free(old_array);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bpf_prog_put(event->prog);
 | 
				
			||||||
 | 
						event->prog = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						mutex_unlock(&bpf_event_mutex);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1174,13 +1174,12 @@ static void
 | 
				
			||||||
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 | 
					kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct trace_event_call *call = &tk->tp.call;
 | 
						struct trace_event_call *call = &tk->tp.call;
 | 
				
			||||||
	struct bpf_prog *prog = call->prog;
 | 
					 | 
				
			||||||
	struct kprobe_trace_entry_head *entry;
 | 
						struct kprobe_trace_entry_head *entry;
 | 
				
			||||||
	struct hlist_head *head;
 | 
						struct hlist_head *head;
 | 
				
			||||||
	int size, __size, dsize;
 | 
						int size, __size, dsize;
 | 
				
			||||||
	int rctx;
 | 
						int rctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (prog && !trace_call_bpf(prog, regs))
 | 
						if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	head = this_cpu_ptr(call->perf_events);
 | 
						head = this_cpu_ptr(call->perf_events);
 | 
				
			||||||
| 
						 | 
					@ -1210,13 +1209,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 | 
				
			||||||
		    struct pt_regs *regs)
 | 
							    struct pt_regs *regs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct trace_event_call *call = &tk->tp.call;
 | 
						struct trace_event_call *call = &tk->tp.call;
 | 
				
			||||||
	struct bpf_prog *prog = call->prog;
 | 
					 | 
				
			||||||
	struct kretprobe_trace_entry_head *entry;
 | 
						struct kretprobe_trace_entry_head *entry;
 | 
				
			||||||
	struct hlist_head *head;
 | 
						struct hlist_head *head;
 | 
				
			||||||
	int size, __size, dsize;
 | 
						int size, __size, dsize;
 | 
				
			||||||
	int rctx;
 | 
						int rctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (prog && !trace_call_bpf(prog, regs))
 | 
						if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	head = this_cpu_ptr(call->perf_events);
 | 
						head = this_cpu_ptr(call->perf_events);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -559,9 +559,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
 | 
				
			||||||
static int sys_perf_refcount_enter;
 | 
					static int sys_perf_refcount_enter;
 | 
				
			||||||
static int sys_perf_refcount_exit;
 | 
					static int sys_perf_refcount_exit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
 | 
					static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
 | 
				
			||||||
			       struct syscall_metadata *sys_data,
 | 
								       struct syscall_metadata *sys_data,
 | 
				
			||||||
			      struct syscall_trace_enter *rec) {
 | 
								       struct syscall_trace_enter *rec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
	struct syscall_tp_t {
 | 
						struct syscall_tp_t {
 | 
				
			||||||
		unsigned long long regs;
 | 
							unsigned long long regs;
 | 
				
			||||||
		unsigned long syscall_nr;
 | 
							unsigned long syscall_nr;
 | 
				
			||||||
| 
						 | 
					@ -573,7 +574,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
 | 
				
			||||||
	param.syscall_nr = rec->nr;
 | 
						param.syscall_nr = rec->nr;
 | 
				
			||||||
	for (i = 0; i < sys_data->nb_args; i++)
 | 
						for (i = 0; i < sys_data->nb_args; i++)
 | 
				
			||||||
		param.args[i] = rec->args[i];
 | 
							param.args[i] = rec->args[i];
 | 
				
			||||||
	return trace_call_bpf(prog, ¶m);
 | 
						return trace_call_bpf(call, ¶m);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 | 
					static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 | 
				
			||||||
| 
						 | 
					@ -581,7 +582,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 | 
				
			||||||
	struct syscall_metadata *sys_data;
 | 
						struct syscall_metadata *sys_data;
 | 
				
			||||||
	struct syscall_trace_enter *rec;
 | 
						struct syscall_trace_enter *rec;
 | 
				
			||||||
	struct hlist_head *head;
 | 
						struct hlist_head *head;
 | 
				
			||||||
	struct bpf_prog *prog;
 | 
						bool valid_prog_array;
 | 
				
			||||||
	int syscall_nr;
 | 
						int syscall_nr;
 | 
				
			||||||
	int rctx;
 | 
						int rctx;
 | 
				
			||||||
	int size;
 | 
						int size;
 | 
				
			||||||
| 
						 | 
					@ -596,9 +597,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 | 
				
			||||||
	if (!sys_data)
 | 
						if (!sys_data)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prog = READ_ONCE(sys_data->enter_event->prog);
 | 
					 | 
				
			||||||
	head = this_cpu_ptr(sys_data->enter_event->perf_events);
 | 
						head = this_cpu_ptr(sys_data->enter_event->perf_events);
 | 
				
			||||||
	if (!prog && hlist_empty(head))
 | 
						valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
 | 
				
			||||||
 | 
						if (!valid_prog_array && hlist_empty(head))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* get the size after alignment with the u32 buffer size field */
 | 
						/* get the size after alignment with the u32 buffer size field */
 | 
				
			||||||
| 
						 | 
					@ -614,7 +615,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 | 
				
			||||||
	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 | 
						syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 | 
				
			||||||
			       (unsigned long *)&rec->args);
 | 
								       (unsigned long *)&rec->args);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
 | 
						if ((valid_prog_array &&
 | 
				
			||||||
 | 
						     !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
 | 
				
			||||||
	    hlist_empty(head)) {
 | 
						    hlist_empty(head)) {
 | 
				
			||||||
		perf_swevent_put_recursion_context(rctx);
 | 
							perf_swevent_put_recursion_context(rctx);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -659,8 +661,9 @@ static void perf_sysenter_disable(struct trace_event_call *call)
 | 
				
			||||||
	mutex_unlock(&syscall_trace_lock);
 | 
						mutex_unlock(&syscall_trace_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
 | 
					static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
 | 
				
			||||||
			      struct syscall_trace_exit *rec) {
 | 
								      struct syscall_trace_exit *rec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
	struct syscall_tp_t {
 | 
						struct syscall_tp_t {
 | 
				
			||||||
		unsigned long long regs;
 | 
							unsigned long long regs;
 | 
				
			||||||
		unsigned long syscall_nr;
 | 
							unsigned long syscall_nr;
 | 
				
			||||||
| 
						 | 
					@ -670,7 +673,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
 | 
				
			||||||
	*(struct pt_regs **)¶m = regs;
 | 
						*(struct pt_regs **)¶m = regs;
 | 
				
			||||||
	param.syscall_nr = rec->nr;
 | 
						param.syscall_nr = rec->nr;
 | 
				
			||||||
	param.ret = rec->ret;
 | 
						param.ret = rec->ret;
 | 
				
			||||||
	return trace_call_bpf(prog, ¶m);
 | 
						return trace_call_bpf(call, ¶m);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 | 
					static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 | 
				
			||||||
| 
						 | 
					@ -678,7 +681,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 | 
				
			||||||
	struct syscall_metadata *sys_data;
 | 
						struct syscall_metadata *sys_data;
 | 
				
			||||||
	struct syscall_trace_exit *rec;
 | 
						struct syscall_trace_exit *rec;
 | 
				
			||||||
	struct hlist_head *head;
 | 
						struct hlist_head *head;
 | 
				
			||||||
	struct bpf_prog *prog;
 | 
						bool valid_prog_array;
 | 
				
			||||||
	int syscall_nr;
 | 
						int syscall_nr;
 | 
				
			||||||
	int rctx;
 | 
						int rctx;
 | 
				
			||||||
	int size;
 | 
						int size;
 | 
				
			||||||
| 
						 | 
					@ -693,9 +696,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 | 
				
			||||||
	if (!sys_data)
 | 
						if (!sys_data)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prog = READ_ONCE(sys_data->exit_event->prog);
 | 
					 | 
				
			||||||
	head = this_cpu_ptr(sys_data->exit_event->perf_events);
 | 
						head = this_cpu_ptr(sys_data->exit_event->perf_events);
 | 
				
			||||||
	if (!prog && hlist_empty(head))
 | 
						valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
 | 
				
			||||||
 | 
						if (!valid_prog_array && hlist_empty(head))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* We can probably do that at build time */
 | 
						/* We can probably do that at build time */
 | 
				
			||||||
| 
						 | 
					@ -709,7 +712,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 | 
				
			||||||
	rec->nr = syscall_nr;
 | 
						rec->nr = syscall_nr;
 | 
				
			||||||
	rec->ret = syscall_get_return_value(current, regs);
 | 
						rec->ret = syscall_get_return_value(current, regs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
 | 
						if ((valid_prog_array &&
 | 
				
			||||||
 | 
						     !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
 | 
				
			||||||
	    hlist_empty(head)) {
 | 
						    hlist_empty(head)) {
 | 
				
			||||||
		perf_swevent_put_recursion_context(rctx);
 | 
							perf_swevent_put_recursion_context(rctx);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1113,13 +1113,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct trace_event_call *call = &tu->tp.call;
 | 
						struct trace_event_call *call = &tu->tp.call;
 | 
				
			||||||
	struct uprobe_trace_entry_head *entry;
 | 
						struct uprobe_trace_entry_head *entry;
 | 
				
			||||||
	struct bpf_prog *prog = call->prog;
 | 
					 | 
				
			||||||
	struct hlist_head *head;
 | 
						struct hlist_head *head;
 | 
				
			||||||
	void *data;
 | 
						void *data;
 | 
				
			||||||
	int size, esize;
 | 
						int size, esize;
 | 
				
			||||||
	int rctx;
 | 
						int rctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (prog && !trace_call_bpf(prog, regs))
 | 
						if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 | 
						esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue