mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	bpf: add writable context for raw tracepoints
This is an opt-in interface that allows a tracepoint to provide a safe buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program. The size of the buffer must be a compile-time constant, and is checked before allowing a BPF program to attach to a tracepoint that uses this feature. The pointer to this buffer will be the first argument of tracepoints that opt in; the pointer is valid and can be bpf_probe_read() by both BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a tracepoint, but the buffer to which it points may only be written by the latter. Signed-off-by: Matt Mullins <mmullins@fb.com> Acked-by: Yonghong Song <yhs@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
		
							parent
							
								
									34b8ab091f
								
							
						
					
					
						commit
						9df1c28bb7
					
				
					 8 changed files with 91 additions and 4 deletions
				
			
		| 
						 | 
					@ -272,6 +272,7 @@ enum bpf_reg_type {
 | 
				
			||||||
	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 | 
						PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 | 
				
			||||||
	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 | 
						PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 | 
				
			||||||
	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 | 
						PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 | 
				
			||||||
 | 
						PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* The information passed from prog-specific *_is_valid_access
 | 
					/* The information passed from prog-specific *_is_valid_access
 | 
				
			||||||
| 
						 | 
					@ -361,6 +362,7 @@ struct bpf_prog_aux {
 | 
				
			||||||
	u32 used_map_cnt;
 | 
						u32 used_map_cnt;
 | 
				
			||||||
	u32 max_ctx_offset;
 | 
						u32 max_ctx_offset;
 | 
				
			||||||
	u32 max_pkt_offset;
 | 
						u32 max_pkt_offset;
 | 
				
			||||||
 | 
						u32 max_tp_access;
 | 
				
			||||||
	u32 stack_depth;
 | 
						u32 stack_depth;
 | 
				
			||||||
	u32 id;
 | 
						u32 id;
 | 
				
			||||||
	u32 func_cnt; /* used by non-func prog as the number of func progs */
 | 
						u32 func_cnt; /* used by non-func prog as the number of func progs */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 | 
				
			||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 | 
					BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 | 
				
			||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 | 
					BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
 | 
				
			||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 | 
					BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 | 
				
			||||||
 | 
					BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef CONFIG_CGROUP_BPF
 | 
					#ifdef CONFIG_CGROUP_BPF
 | 
				
			||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
 | 
					BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -45,6 +45,7 @@ struct bpf_raw_event_map {
 | 
				
			||||||
	struct tracepoint	*tp;
 | 
						struct tracepoint	*tp;
 | 
				
			||||||
	void			*bpf_func;
 | 
						void			*bpf_func;
 | 
				
			||||||
	u32			num_args;
 | 
						u32			num_args;
 | 
				
			||||||
 | 
						u32			writable_size;
 | 
				
			||||||
} __aligned(32);
 | 
					} __aligned(32);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto)					\
 | 
				
			||||||
 * to make sure that if the tracepoint handling changes, the
 | 
					 * to make sure that if the tracepoint handling changes, the
 | 
				
			||||||
 * bpf probe will fail to compile unless it too is updated.
 | 
					 * bpf probe will fail to compile unless it too is updated.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#undef DEFINE_EVENT
 | 
					#define __DEFINE_EVENT(template, call, proto, args, size)		\
 | 
				
			||||||
#define DEFINE_EVENT(template, call, proto, args)			\
 | 
					 | 
				
			||||||
static inline void bpf_test_probe_##call(void)				\
 | 
					static inline void bpf_test_probe_##call(void)				\
 | 
				
			||||||
{									\
 | 
					{									\
 | 
				
			||||||
	check_trace_callback_type_##call(__bpf_trace_##template);	\
 | 
						check_trace_callback_type_##call(__bpf_trace_##template);	\
 | 
				
			||||||
| 
						 | 
					@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = {						\
 | 
				
			||||||
	.tp		= &__tracepoint_##call,				\
 | 
						.tp		= &__tracepoint_##call,				\
 | 
				
			||||||
	.bpf_func	= (void *)__bpf_trace_##template,		\
 | 
						.bpf_func	= (void *)__bpf_trace_##template,		\
 | 
				
			||||||
	.num_args	= COUNT_ARGS(args),				\
 | 
						.num_args	= COUNT_ARGS(args),				\
 | 
				
			||||||
 | 
						.writable_size	= size,						\
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define FIRST(x, ...) x
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef DEFINE_EVENT_WRITABLE
 | 
				
			||||||
 | 
					#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size)	\
 | 
				
			||||||
 | 
					static inline void bpf_test_buffer_##call(void)				\
 | 
				
			||||||
 | 
					{									\
 | 
				
			||||||
 | 
						/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
 | 
				
			||||||
 | 
						 * BUILD_BUG_ON_ZERO() uses a different mechanism that is not	\
 | 
				
			||||||
 | 
						 * dead-code-eliminated.					\
 | 
				
			||||||
 | 
						 */								\
 | 
				
			||||||
 | 
						FIRST(proto);							\
 | 
				
			||||||
 | 
						(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args)));		\
 | 
				
			||||||
 | 
					}									\
 | 
				
			||||||
 | 
					__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef DEFINE_EVENT
 | 
				
			||||||
 | 
					#define DEFINE_EVENT(template, call, proto, args)			\
 | 
				
			||||||
 | 
						__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#undef DEFINE_EVENT_PRINT
 | 
					#undef DEFINE_EVENT_PRINT
 | 
				
			||||||
#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 | 
					#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
 | 
				
			||||||
	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 | 
						DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 | 
					#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef DEFINE_EVENT_WRITABLE
 | 
				
			||||||
 | 
					#undef __DEFINE_EVENT
 | 
				
			||||||
 | 
					#undef FIRST
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* CONFIG_BPF_EVENTS */
 | 
					#endif /* CONFIG_BPF_EVENTS */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -168,6 +168,7 @@ enum bpf_prog_type {
 | 
				
			||||||
	BPF_PROG_TYPE_SK_REUSEPORT,
 | 
						BPF_PROG_TYPE_SK_REUSEPORT,
 | 
				
			||||||
	BPF_PROG_TYPE_FLOW_DISSECTOR,
 | 
						BPF_PROG_TYPE_FLOW_DISSECTOR,
 | 
				
			||||||
	BPF_PROG_TYPE_CGROUP_SYSCTL,
 | 
						BPF_PROG_TYPE_CGROUP_SYSCTL,
 | 
				
			||||||
 | 
						BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum bpf_attach_type {
 | 
					enum bpf_attach_type {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	raw_tp->btp = btp;
 | 
						raw_tp->btp = btp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
 | 
						prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
 | 
				
			||||||
				 BPF_PROG_TYPE_RAW_TRACEPOINT);
 | 
					 | 
				
			||||||
	if (IS_ERR(prog)) {
 | 
						if (IS_ERR(prog)) {
 | 
				
			||||||
		err = PTR_ERR(prog);
 | 
							err = PTR_ERR(prog);
 | 
				
			||||||
		goto out_free_tp;
 | 
							goto out_free_tp;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
 | 
				
			||||||
 | 
						    prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
 | 
				
			||||||
 | 
							err = -EINVAL;
 | 
				
			||||||
 | 
							goto out_put_prog;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = bpf_probe_register(raw_tp->btp, prog);
 | 
						err = bpf_probe_register(raw_tp->btp, prog);
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
 | 
				
			||||||
	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 | 
						[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 | 
				
			||||||
	[PTR_TO_TCP_SOCK]	= "tcp_sock",
 | 
						[PTR_TO_TCP_SOCK]	= "tcp_sock",
 | 
				
			||||||
	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 | 
						[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 | 
				
			||||||
 | 
						[PTR_TO_TP_BUFFER]	= "tp_buffer",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char slot_type_char[] = {
 | 
					static char slot_type_char[] = {
 | 
				
			||||||
| 
						 | 
					@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int check_tp_buffer_access(struct bpf_verifier_env *env,
 | 
				
			||||||
 | 
									  const struct bpf_reg_state *reg,
 | 
				
			||||||
 | 
									  int regno, int off, int size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (off < 0) {
 | 
				
			||||||
 | 
							verbose(env,
 | 
				
			||||||
 | 
								"R%d invalid tracepoint buffer access: off=%d, size=%d",
 | 
				
			||||||
 | 
								regno, off, size);
 | 
				
			||||||
 | 
							return -EACCES;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 | 
				
			||||||
 | 
							char tn_buf[48];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 | 
				
			||||||
 | 
							verbose(env,
 | 
				
			||||||
 | 
								"R%d invalid variable buffer offset: off=%d, var_off=%s",
 | 
				
			||||||
 | 
								regno, off, tn_buf);
 | 
				
			||||||
 | 
							return -EACCES;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (off + size > env->prog->aux->max_tp_access)
 | 
				
			||||||
 | 
							env->prog->aux->max_tp_access = off + size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* truncate register to smaller size (in bytes)
 | 
					/* truncate register to smaller size (in bytes)
 | 
				
			||||||
 * must be called with size < BPF_REG_SIZE
 | 
					 * must be called with size < BPF_REG_SIZE
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 | 
				
			||||||
		err = check_sock_access(env, insn_idx, regno, off, size, t);
 | 
							err = check_sock_access(env, insn_idx, regno, off, size, t);
 | 
				
			||||||
		if (!err && value_regno >= 0)
 | 
							if (!err && value_regno >= 0)
 | 
				
			||||||
			mark_reg_unknown(env, regs, value_regno);
 | 
								mark_reg_unknown(env, regs, value_regno);
 | 
				
			||||||
 | 
						} else if (reg->type == PTR_TO_TP_BUFFER) {
 | 
				
			||||||
 | 
							err = check_tp_buffer_access(env, reg, regno, off, size);
 | 
				
			||||||
 | 
							if (!err && t == BPF_READ && value_regno >= 0)
 | 
				
			||||||
 | 
								mark_reg_unknown(env, regs, value_regno);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		verbose(env, "R%d invalid mem access '%s'\n", regno,
 | 
							verbose(env, "R%d invalid mem access '%s'\n", regno,
 | 
				
			||||||
			reg_type_str[reg->type]);
 | 
								reg_type_str[reg->type]);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
 | 
				
			||||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 | 
					const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool raw_tp_writable_prog_is_valid_access(int off, int size,
 | 
				
			||||||
 | 
											 enum bpf_access_type type,
 | 
				
			||||||
 | 
											 const struct bpf_prog *prog,
 | 
				
			||||||
 | 
											 struct bpf_insn_access_aux *info)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (off == 0) {
 | 
				
			||||||
 | 
							if (size != sizeof(u64) || type != BPF_READ)
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
							info->reg_type = PTR_TO_TP_BUFFER;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return raw_tp_prog_is_valid_access(off, size, type, prog, info);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
 | 
				
			||||||
 | 
						.get_func_proto  = raw_tp_prog_func_proto,
 | 
				
			||||||
 | 
						.is_valid_access = raw_tp_writable_prog_is_valid_access,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 | 
					static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 | 
				
			||||||
				    const struct bpf_prog *prog,
 | 
									    const struct bpf_prog *prog,
 | 
				
			||||||
				    struct bpf_insn_access_aux *info)
 | 
									    struct bpf_insn_access_aux *info)
 | 
				
			||||||
| 
						 | 
					@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 | 
				
			||||||
	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
 | 
						if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (prog->aux->max_tp_access > btp->writable_size)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
 | 
						return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue