mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	bpf, x86: Create bpf_tramp_run_ctx on the caller thread's stack
BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on stacks and set/reset the current bpf_run_ctx before/after calling a bpf_prog. Signed-off-by: Kui-Feng Lee <kuifeng@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20220510205923.3206889-3-kuifeng@fb.com
This commit is contained in:
		
							parent
							
								
									f7e0beaf39
								
							
						
					
					
						commit
						e384c7b7b4
					
				
					 4 changed files with 66 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -1763,14 +1763,30 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
 | 
			
		|||
 | 
			
		||||
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 | 
			
		||||
			   struct bpf_tramp_link *l, int stack_size,
 | 
			
		||||
			   bool save_ret)
 | 
			
		||||
			   int run_ctx_off, bool save_ret)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
	u8 *jmp_insn;
 | 
			
		||||
	int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
 | 
			
		||||
	struct bpf_prog *p = l->link.prog;
 | 
			
		||||
 | 
			
		||||
	/* mov rdi, 0 */
 | 
			
		||||
	emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
 | 
			
		||||
 | 
			
		||||
	/* Prepare struct bpf_tramp_run_ctx.
 | 
			
		||||
	 *
 | 
			
		||||
	 * bpf_tramp_run_ctx is already preserved by
 | 
			
		||||
	 * arch_prepare_bpf_trampoline().
 | 
			
		||||
	 *
 | 
			
		||||
	 * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi
 | 
			
		||||
	 */
 | 
			
		||||
	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
 | 
			
		||||
 | 
			
		||||
	/* arg1: mov rdi, progs[i] */
 | 
			
		||||
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 | 
			
		||||
	/* arg2: lea rsi, [rbp - ctx_cookie_off] */
 | 
			
		||||
	EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
 | 
			
		||||
 | 
			
		||||
	if (emit_call(&prog,
 | 
			
		||||
		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
 | 
			
		||||
		      __bpf_prog_enter, prog))
 | 
			
		||||
| 
						 | 
				
			
			@ -1816,6 +1832,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 | 
			
		|||
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 | 
			
		||||
	/* arg2: mov rsi, rbx <- start time in nsec */
 | 
			
		||||
	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
 | 
			
		||||
	/* arg3: lea rdx, [rbp - run_ctx_off] */
 | 
			
		||||
	EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
 | 
			
		||||
	if (emit_call(&prog,
 | 
			
		||||
		      p->aux->sleepable ? __bpf_prog_exit_sleepable :
 | 
			
		||||
		      __bpf_prog_exit, prog))
 | 
			
		||||
| 
						 | 
				
			
			@ -1853,14 +1871,14 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 | 
			
		|||
 | 
			
		||||
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
 | 
			
		||||
		      struct bpf_tramp_links *tl, int stack_size,
 | 
			
		||||
		      bool save_ret)
 | 
			
		||||
		      int run_ctx_off, bool save_ret)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < tl->nr_links; i++) {
 | 
			
		||||
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
 | 
			
		||||
				    save_ret))
 | 
			
		||||
				    run_ctx_off, save_ret))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
	*pprog = prog;
 | 
			
		||||
| 
						 | 
				
			
			@ -1869,7 +1887,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
 | 
			
		|||
 | 
			
		||||
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 | 
			
		||||
			      struct bpf_tramp_links *tl, int stack_size,
 | 
			
		||||
			      u8 **branches)
 | 
			
		||||
			      int run_ctx_off, u8 **branches)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
	int i;
 | 
			
		||||
| 
						 | 
				
			
			@ -1880,7 +1898,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 | 
			
		|||
	emit_mov_imm32(&prog, false, BPF_REG_0, 0);
 | 
			
		||||
	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 | 
			
		||||
	for (i = 0; i < tl->nr_links; i++) {
 | 
			
		||||
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, true))
 | 
			
		||||
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
 | 
			
		||||
		/* mod_ret prog stored return value into [rbp - 8]. Emit:
 | 
			
		||||
| 
						 | 
				
			
			@ -1986,7 +2004,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
				void *orig_call)
 | 
			
		||||
{
 | 
			
		||||
	int ret, i, nr_args = m->nr_args;
 | 
			
		||||
	int regs_off, ip_off, args_off, stack_size = nr_args * 8;
 | 
			
		||||
	int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
 | 
			
		||||
	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
 | 
			
		||||
	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
 | 
			
		||||
	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
 | 
			
		||||
| 
						 | 
				
			
			@ -2016,6 +2034,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
	 * RBP - args_off  [ args count      ]  always
 | 
			
		||||
	 *
 | 
			
		||||
	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
 | 
			
		||||
	 *
 | 
			
		||||
	 * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	/* room for return value of orig_call or fentry prog */
 | 
			
		||||
| 
						 | 
				
			
			@ -2034,6 +2054,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
 | 
			
		||||
	ip_off = stack_size;
 | 
			
		||||
 | 
			
		||||
	stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
 | 
			
		||||
	run_ctx_off = stack_size;
 | 
			
		||||
 | 
			
		||||
	if (flags & BPF_TRAMP_F_SKIP_FRAME) {
 | 
			
		||||
		/* skip patched call instruction and point orig_call to actual
 | 
			
		||||
		 * body of the kernel function.
 | 
			
		||||
| 
						 | 
				
			
			@ -2081,7 +2104,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (fentry->nr_links)
 | 
			
		||||
		if (invoke_bpf(m, &prog, fentry, regs_off,
 | 
			
		||||
		if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
 | 
			
		||||
			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2092,7 +2115,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
			return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
		if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
 | 
			
		||||
				       branches)) {
 | 
			
		||||
				       run_ctx_off, branches)) {
 | 
			
		||||
			ret = -EINVAL;
 | 
			
		||||
			goto cleanup;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -2129,7 +2152,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (fexit->nr_links)
 | 
			
		||||
		if (invoke_bpf(m, &prog, fexit, regs_off, false)) {
 | 
			
		||||
		if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
 | 
			
		||||
			ret = -EINVAL;
 | 
			
		||||
			goto cleanup;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -730,6 +730,8 @@ struct bpf_tramp_links {
 | 
			
		|||
	int nr_links;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct bpf_tramp_run_ctx;
 | 
			
		||||
 | 
			
		||||
/* Different use cases for BPF trampoline:
 | 
			
		||||
 * 1. replace nop at the function entry (kprobe equivalent)
 | 
			
		||||
 *    flags = BPF_TRAMP_F_RESTORE_REGS
 | 
			
		||||
| 
						 | 
				
			
			@ -756,10 +758,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
 | 
			
		|||
				struct bpf_tramp_links *tlinks,
 | 
			
		||||
				void *orig_call);
 | 
			
		||||
/* these two functions are called from generated trampoline */
 | 
			
		||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
 | 
			
		||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 | 
			
		||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
 | 
			
		||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 | 
			
		||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
 | 
			
		||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
 | 
			
		||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
 | 
			
		||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
 | 
			
		||||
				       struct bpf_tramp_run_ctx *run_ctx);
 | 
			
		||||
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 | 
			
		||||
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1351,6 +1354,12 @@ struct bpf_trace_run_ctx {
 | 
			
		|||
	u64 bpf_cookie;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct bpf_tramp_run_ctx {
 | 
			
		||||
	struct bpf_run_ctx run_ctx;
 | 
			
		||||
	u64 bpf_cookie;
 | 
			
		||||
	struct bpf_run_ctx *saved_run_ctx;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
 | 
			
		||||
{
 | 
			
		||||
	struct bpf_run_ctx *old_ctx = NULL;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5020,6 +5020,7 @@ static bool syscall_prog_is_valid_access(int off, int size,
 | 
			
		|||
BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 | 
			
		||||
{
 | 
			
		||||
	struct bpf_prog * __maybe_unused prog;
 | 
			
		||||
	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
 | 
			
		||||
 | 
			
		||||
	switch (cmd) {
 | 
			
		||||
	case BPF_MAP_CREATE:
 | 
			
		||||
| 
						 | 
				
			
			@ -5047,13 +5048,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 | 
			
		|||
			return -EINVAL;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (!__bpf_prog_enter_sleepable(prog)) {
 | 
			
		||||
		run_ctx.bpf_cookie = 0;
 | 
			
		||||
		run_ctx.saved_run_ctx = NULL;
 | 
			
		||||
		if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
 | 
			
		||||
			/* recursion detected */
 | 
			
		||||
			bpf_prog_put(prog);
 | 
			
		||||
			return -EBUSY;
 | 
			
		||||
		}
 | 
			
		||||
		attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
 | 
			
		||||
		__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */);
 | 
			
		||||
		__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
 | 
			
		||||
		bpf_prog_put(prog);
 | 
			
		||||
		return 0;
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -568,11 +568,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
 | 
			
		|||
 * [2..MAX_U64] - execute bpf prog and record execution time.
 | 
			
		||||
 *     This is start time.
 | 
			
		||||
 */
 | 
			
		||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
 | 
			
		||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 | 
			
		||||
	__acquires(RCU)
 | 
			
		||||
{
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	migrate_disable();
 | 
			
		||||
 | 
			
		||||
	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 | 
			
		||||
 | 
			
		||||
	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
 | 
			
		||||
		inc_misses_counter(prog);
 | 
			
		||||
		return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -602,29 +605,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 | 
			
		||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
 | 
			
		||||
	__releases(RCU)
 | 
			
		||||
{
 | 
			
		||||
	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 | 
			
		||||
 | 
			
		||||
	update_prog_stats(prog, start);
 | 
			
		||||
	__this_cpu_dec(*(prog->active));
 | 
			
		||||
	migrate_enable();
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
 | 
			
		||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 | 
			
		||||
{
 | 
			
		||||
	rcu_read_lock_trace();
 | 
			
		||||
	migrate_disable();
 | 
			
		||||
	might_fault();
 | 
			
		||||
 | 
			
		||||
	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
 | 
			
		||||
		inc_misses_counter(prog);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 | 
			
		||||
 | 
			
		||||
	return bpf_prog_start_time();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
 | 
			
		||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
 | 
			
		||||
				       struct bpf_tramp_run_ctx *run_ctx)
 | 
			
		||||
{
 | 
			
		||||
	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 | 
			
		||||
 | 
			
		||||
	update_prog_stats(prog, start);
 | 
			
		||||
	__this_cpu_dec(*(prog->active));
 | 
			
		||||
	migrate_enable();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue