mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	bpf: Add per-program recursion prevention mechanism
Since both sleepable and non-sleepable programs execute under migrate_disable add recursion prevention mechanism to both types of programs when they're executed via bpf trampoline. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20210210033634.62081-5-alexei.starovoitov@gmail.com
This commit is contained in:
		
							parent
							
								
									f2dd3b3946
								
							
						
					
					
						commit
						ca06f55b90
					
				
					 7 changed files with 50 additions and 11 deletions
				
			
		| 
						 | 
					@ -1740,8 +1740,11 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 | 
				
			||||||
			   struct bpf_prog *p, int stack_size, bool mod_ret)
 | 
								   struct bpf_prog *p, int stack_size, bool mod_ret)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u8 *prog = *pprog;
 | 
						u8 *prog = *pprog;
 | 
				
			||||||
 | 
						u8 *jmp_insn;
 | 
				
			||||||
	int cnt = 0;
 | 
						int cnt = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* arg1: mov rdi, progs[i] */
 | 
				
			||||||
 | 
						emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 | 
				
			||||||
	if (emit_call(&prog,
 | 
						if (emit_call(&prog,
 | 
				
			||||||
		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
 | 
							      p->aux->sleepable ? __bpf_prog_enter_sleepable :
 | 
				
			||||||
		      __bpf_prog_enter, prog))
 | 
							      __bpf_prog_enter, prog))
 | 
				
			||||||
| 
						 | 
					@ -1749,6 +1752,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 | 
				
			||||||
	/* remember prog start time returned by __bpf_prog_enter */
 | 
						/* remember prog start time returned by __bpf_prog_enter */
 | 
				
			||||||
	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 | 
						emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* if (__bpf_prog_enter*(prog) == 0)
 | 
				
			||||||
 | 
						 *	goto skip_exec_of_prog;
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						EMIT3(0x48, 0x85, 0xC0);  /* test rax,rax */
 | 
				
			||||||
 | 
						/* emit 2 nops that will be replaced with JE insn */
 | 
				
			||||||
 | 
						jmp_insn = prog;
 | 
				
			||||||
 | 
						emit_nops(&prog, 2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* arg1: lea rdi, [rbp - stack_size] */
 | 
						/* arg1: lea rdi, [rbp - stack_size] */
 | 
				
			||||||
	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
 | 
						EMIT4(0x48, 0x8D, 0x7D, -stack_size);
 | 
				
			||||||
	/* arg2: progs[i]->insnsi for interpreter */
 | 
						/* arg2: progs[i]->insnsi for interpreter */
 | 
				
			||||||
| 
						 | 
					@ -1767,6 +1778,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 | 
				
			||||||
	if (mod_ret)
 | 
						if (mod_ret)
 | 
				
			||||||
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 | 
							emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* replace 2 nops with JE insn, since jmp target is known */
 | 
				
			||||||
 | 
						jmp_insn[0] = X86_JE;
 | 
				
			||||||
 | 
						jmp_insn[1] = prog - jmp_insn - 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* arg1: mov rdi, progs[i] */
 | 
						/* arg1: mov rdi, progs[i] */
 | 
				
			||||||
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 | 
						emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 | 
				
			||||||
	/* arg2: mov rsi, rbx <- start time in nsec */
 | 
						/* arg2: mov rsi, rbx <- start time in nsec */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -529,7 +529,7 @@ struct btf_func_model {
 | 
				
			||||||
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 | 
					/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 | 
				
			||||||
 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 | 
					 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define BPF_MAX_TRAMP_PROGS 40
 | 
					#define BPF_MAX_TRAMP_PROGS 38
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_tramp_progs {
 | 
					struct bpf_tramp_progs {
 | 
				
			||||||
	struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
 | 
						struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
 | 
				
			||||||
| 
						 | 
					@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 | 
				
			||||||
				struct bpf_tramp_progs *tprogs,
 | 
									struct bpf_tramp_progs *tprogs,
 | 
				
			||||||
				void *orig_call);
 | 
									void *orig_call);
 | 
				
			||||||
/* these two functions are called from generated trampoline */
 | 
					/* these two functions are called from generated trampoline */
 | 
				
			||||||
u64 notrace __bpf_prog_enter(void);
 | 
					u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
 | 
				
			||||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 | 
					void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 | 
				
			||||||
u64 notrace __bpf_prog_enter_sleepable(void);
 | 
					u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
 | 
				
			||||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 | 
					void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_ksym {
 | 
					struct bpf_ksym {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -565,6 +565,7 @@ struct bpf_prog {
 | 
				
			||||||
	u32			jited_len;	/* Size of jited insns in bytes */
 | 
						u32			jited_len;	/* Size of jited insns in bytes */
 | 
				
			||||||
	u8			tag[BPF_TAG_SIZE];
 | 
						u8			tag[BPF_TAG_SIZE];
 | 
				
			||||||
	struct bpf_prog_stats __percpu *stats;
 | 
						struct bpf_prog_stats __percpu *stats;
 | 
				
			||||||
 | 
						int __percpu		*active;
 | 
				
			||||||
	unsigned int		(*bpf_func)(const void *ctx,
 | 
						unsigned int		(*bpf_func)(const void *ctx,
 | 
				
			||||||
					    const struct bpf_insn *insn);
 | 
										    const struct bpf_insn *insn);
 | 
				
			||||||
	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 | 
						struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -91,6 +91,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 | 
				
			||||||
		vfree(fp);
 | 
							vfree(fp);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
 | 
				
			||||||
 | 
						if (!fp->active) {
 | 
				
			||||||
 | 
							vfree(fp);
 | 
				
			||||||
 | 
							kfree(aux);
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fp->pages = size / PAGE_SIZE;
 | 
						fp->pages = size / PAGE_SIZE;
 | 
				
			||||||
	fp->aux = aux;
 | 
						fp->aux = aux;
 | 
				
			||||||
| 
						 | 
					@ -116,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 | 
						prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 | 
				
			||||||
	if (!prog->stats) {
 | 
						if (!prog->stats) {
 | 
				
			||||||
 | 
							free_percpu(prog->active);
 | 
				
			||||||
		kfree(prog->aux);
 | 
							kfree(prog->aux);
 | 
				
			||||||
		vfree(prog);
 | 
							vfree(prog);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					@ -253,6 +260,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 | 
				
			||||||
		kfree(fp->aux);
 | 
							kfree(fp->aux);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	free_percpu(fp->stats);
 | 
						free_percpu(fp->stats);
 | 
				
			||||||
 | 
						free_percpu(fp->active);
 | 
				
			||||||
	vfree(fp);
 | 
						vfree(fp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -381,13 +381,16 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 | 
				
			||||||
	mutex_unlock(&trampoline_mutex);
 | 
						mutex_unlock(&trampoline_mutex);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define NO_START_TIME 0
 | 
					#define NO_START_TIME 1
 | 
				
			||||||
static u64 notrace bpf_prog_start_time(void)
 | 
					static u64 notrace bpf_prog_start_time(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u64 start = NO_START_TIME;
 | 
						u64 start = NO_START_TIME;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (static_branch_unlikely(&bpf_stats_enabled_key))
 | 
						if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 | 
				
			||||||
		start = sched_clock();
 | 
							start = sched_clock();
 | 
				
			||||||
 | 
							if (unlikely(!start))
 | 
				
			||||||
 | 
								start = NO_START_TIME;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return start;
 | 
						return start;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -397,12 +400,20 @@ static u64 notrace bpf_prog_start_time(void)
 | 
				
			||||||
 * call __bpf_prog_enter
 | 
					 * call __bpf_prog_enter
 | 
				
			||||||
 * call prog->bpf_func
 | 
					 * call prog->bpf_func
 | 
				
			||||||
 * call __bpf_prog_exit
 | 
					 * call __bpf_prog_exit
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * __bpf_prog_enter returns:
 | 
				
			||||||
 | 
					 * 0 - skip execution of the bpf prog
 | 
				
			||||||
 | 
					 * 1 - execute bpf prog
 | 
				
			||||||
 | 
					 * [2..MAX_U64] - excute bpf prog and record execution time.
 | 
				
			||||||
 | 
					 *     This is start time.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
u64 notrace __bpf_prog_enter(void)
 | 
					u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
 | 
				
			||||||
	__acquires(RCU)
 | 
						__acquires(RCU)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
	migrate_disable();
 | 
						migrate_disable();
 | 
				
			||||||
 | 
						if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
	return bpf_prog_start_time();
 | 
						return bpf_prog_start_time();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -430,21 +441,25 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 | 
				
			||||||
	__releases(RCU)
 | 
						__releases(RCU)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	update_prog_stats(prog, start);
 | 
						update_prog_stats(prog, start);
 | 
				
			||||||
 | 
						__this_cpu_dec(*(prog->active));
 | 
				
			||||||
	migrate_enable();
 | 
						migrate_enable();
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
u64 notrace __bpf_prog_enter_sleepable(void)
 | 
					u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	rcu_read_lock_trace();
 | 
						rcu_read_lock_trace();
 | 
				
			||||||
	migrate_disable();
 | 
						migrate_disable();
 | 
				
			||||||
	might_fault();
 | 
						might_fault();
 | 
				
			||||||
 | 
						if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
	return bpf_prog_start_time();
 | 
						return bpf_prog_start_time();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
 | 
					void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	update_prog_stats(prog, start);
 | 
						update_prog_stats(prog, start);
 | 
				
			||||||
 | 
						__this_cpu_dec(*(prog->active));
 | 
				
			||||||
	migrate_enable();
 | 
						migrate_enable();
 | 
				
			||||||
	rcu_read_unlock_trace();
 | 
						rcu_read_unlock_trace();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,8 +2,8 @@
 | 
				
			||||||
/* Copyright (c) 2019 Facebook */
 | 
					/* Copyright (c) 2019 Facebook */
 | 
				
			||||||
#include <test_progs.h>
 | 
					#include <test_progs.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* x86-64 fits 55 JITed and 43 interpreted progs into half page */
 | 
					/* that's kernel internal BPF_MAX_TRAMP_PROGS define */
 | 
				
			||||||
#define CNT 40
 | 
					#define CNT 38
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void test_fexit_stress(void)
 | 
					void test_fexit_stress(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,7 +4,7 @@
 | 
				
			||||||
#include <sys/prctl.h>
 | 
					#include <sys/prctl.h>
 | 
				
			||||||
#include <test_progs.h>
 | 
					#include <test_progs.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MAX_TRAMP_PROGS 40
 | 
					#define MAX_TRAMP_PROGS 38
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct inst {
 | 
					struct inst {
 | 
				
			||||||
	struct bpf_object *obj;
 | 
						struct bpf_object *obj;
 | 
				
			||||||
| 
						 | 
					@ -52,7 +52,7 @@ void test_trampoline_count(void)
 | 
				
			||||||
	struct bpf_link *link;
 | 
						struct bpf_link *link;
 | 
				
			||||||
	char comm[16] = {};
 | 
						char comm[16] = {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* attach 'allowed' 40 trampoline programs */
 | 
						/* attach 'allowed' trampoline programs */
 | 
				
			||||||
	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
 | 
						for (i = 0; i < MAX_TRAMP_PROGS; i++) {
 | 
				
			||||||
		obj = bpf_object__open_file(object, NULL);
 | 
							obj = bpf_object__open_file(object, NULL);
 | 
				
			||||||
		if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
 | 
							if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue