mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	bpf: Add x86-64 JIT support for PROBE_MEM32 pseudo instructions.
Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] instructions. They are similar to PROBE_MEM instructions with the following differences: - PROBE_MEM has to check that the address is in the kernel range with src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE check - PROBE_MEM doesn't support store - PROBE_MEM32 relies on the verifier to clear upper 32-bit in the register - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in %r12 in the prologue) Due to bpf_arena constructions such %r12 + %reg + off16 access is guaranteed to be within arena virtual range, so no address check at run-time. - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When LDX faults the destination register is zeroed. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/bpf/20240308010812.89848-4-alexei.starovoitov@gmail.com
This commit is contained in:
		
							parent
							
								
									667a86ad9b
								
							
						
					
					
						commit
						2fe99eb0cc
					
				
					 3 changed files with 194 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 | 
			
		|||
/* Pick a register outside of BPF range for JIT internal work */
 | 
			
		||||
#define AUX_REG (MAX_BPF_JIT_REG + 1)
 | 
			
		||||
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
 | 
			
		||||
#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The following table maps BPF registers to x86-64 registers.
 | 
			
		||||
| 
						 | 
				
			
			@ -139,6 +140,7 @@ static const int reg2hex[] = {
 | 
			
		|||
	[BPF_REG_AX] = 2, /* R10 temp register */
 | 
			
		||||
	[AUX_REG] = 3,    /* R11 temp register */
 | 
			
		||||
	[X86_REG_R9] = 1, /* R9 register, 6th function argument */
 | 
			
		||||
	[X86_REG_R12] = 4, /* R12 callee saved */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const int reg2pt_regs[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
 | 
			
		|||
			     BIT(BPF_REG_8) |
 | 
			
		||||
			     BIT(BPF_REG_9) |
 | 
			
		||||
			     BIT(X86_REG_R9) |
 | 
			
		||||
			     BIT(X86_REG_R12) |
 | 
			
		||||
			     BIT(BPF_REG_AX));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
 | 
			
		|||
	return byte;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
 | 
			
		||||
{
 | 
			
		||||
	if (is_ereg(r1))
 | 
			
		||||
		byte |= 1;
 | 
			
		||||
	if (is_ereg(index))
 | 
			
		||||
		byte |= 2;
 | 
			
		||||
	if (is_ereg(r2))
 | 
			
		||||
		byte |= 4;
 | 
			
		||||
	return byte;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
 | 
			
		||||
static u8 add_1reg(u8 byte, u32 dst_reg)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
 | 
			
		|||
		pop_r12(&prog);
 | 
			
		||||
	} else {
 | 
			
		||||
		pop_callee_regs(&prog, callee_regs_used);
 | 
			
		||||
		if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
 | 
			
		||||
			pop_r12(&prog);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	EMIT1(0x58);                              /* pop rax */
 | 
			
		||||
| 
						 | 
				
			
			@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
 | 
			
		|||
		pop_r12(&prog);
 | 
			
		||||
	} else {
 | 
			
		||||
		pop_callee_regs(&prog, callee_regs_used);
 | 
			
		||||
		if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
 | 
			
		||||
			pop_r12(&prog);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	EMIT1(0x58);                                  /* pop rax */
 | 
			
		||||
| 
						 | 
				
			
			@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
 | 
			
		|||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
 | 
			
		||||
	if (is_imm8(off)) {
 | 
			
		||||
		EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
 | 
			
		||||
	} else {
 | 
			
		||||
		EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
 | 
			
		||||
	}
 | 
			
		||||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Emit a REX byte if it will be necessary to address these registers
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 | 
			
		|||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
 | 
			
		||||
	switch (size) {
 | 
			
		||||
	case BPF_B:
 | 
			
		||||
		/* movzx rax, byte ptr [rax + r12 + off] */
 | 
			
		||||
		EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_H:
 | 
			
		||||
		/* movzx rax, word ptr [rax + r12 + off] */
 | 
			
		||||
		EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_W:
 | 
			
		||||
		/* mov eax, dword ptr [rax + r12 + off] */
 | 
			
		||||
		EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_DW:
 | 
			
		||||
		/* mov rax, qword ptr [rax + r12 + off] */
 | 
			
		||||
		EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
 | 
			
		||||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
	emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* STX: *(u8*)(dst_reg + off) = src_reg */
 | 
			
		||||
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 | 
			
		|||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
 | 
			
		||||
static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
 | 
			
		||||
	switch (size) {
 | 
			
		||||
	case BPF_B:
 | 
			
		||||
		/* mov byte ptr [rax + r12 + off], al */
 | 
			
		||||
		EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_H:
 | 
			
		||||
		/* mov word ptr [rax + r12 + off], ax */
 | 
			
		||||
		EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_W:
 | 
			
		||||
		/* mov dword ptr [rax + r12 + 1], eax */
 | 
			
		||||
		EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_DW:
 | 
			
		||||
		/* mov qword ptr [rax + r12 + 1], rax */
 | 
			
		||||
		EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
 | 
			
		||||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 | 
			
		||||
{
 | 
			
		||||
	emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
 | 
			
		||||
static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
 | 
			
		||||
{
 | 
			
		||||
	u8 *prog = *pprog;
 | 
			
		||||
 | 
			
		||||
	switch (size) {
 | 
			
		||||
	case BPF_B:
 | 
			
		||||
		/* mov byte ptr [rax + r12 + off], imm8 */
 | 
			
		||||
		EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_H:
 | 
			
		||||
		/* mov word ptr [rax + r12 + off], imm16 */
 | 
			
		||||
		EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_W:
 | 
			
		||||
		/* mov dword ptr [rax + r12 + 1], imm32 */
 | 
			
		||||
		EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_DW:
 | 
			
		||||
		/* mov qword ptr [rax + r12 + 1], imm32 */
 | 
			
		||||
		EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
 | 
			
		||||
	EMIT(imm, bpf_size_to_x86_bytes(size));
 | 
			
		||||
	*pprog = prog;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
 | 
			
		||||
{
 | 
			
		||||
	emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int emit_atomic(u8 **pprog, u8 atomic_op,
 | 
			
		||||
		       u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define DONT_CLEAR 1
 | 
			
		||||
 | 
			
		||||
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	u32 reg = x->fixup >> 8;
 | 
			
		||||
 | 
			
		||||
	/* jump over faulting load and clear dest register */
 | 
			
		||||
	*(unsigned long *)((void *)regs + reg) = 0;
 | 
			
		||||
	if (reg != DONT_CLEAR)
 | 
			
		||||
		*(unsigned long *)((void *)regs + reg) = 0;
 | 
			
		||||
	regs->ip += x->fixup & 0xff;
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1147,11 +1276,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 | 
			
		|||
	bool tail_call_seen = false;
 | 
			
		||||
	bool seen_exit = false;
 | 
			
		||||
	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
 | 
			
		||||
	u64 arena_vm_start;
 | 
			
		||||
	int i, excnt = 0;
 | 
			
		||||
	int ilen, proglen = 0;
 | 
			
		||||
	u8 *prog = temp;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
 | 
			
		||||
 | 
			
		||||
	detect_reg_usage(insn, insn_cnt, callee_regs_used,
 | 
			
		||||
			 &tail_call_seen);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1172,8 +1304,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 | 
			
		|||
		push_r12(&prog);
 | 
			
		||||
		push_callee_regs(&prog, all_callee_regs_used);
 | 
			
		||||
	} else {
 | 
			
		||||
		if (arena_vm_start)
 | 
			
		||||
			push_r12(&prog);
 | 
			
		||||
		push_callee_regs(&prog, callee_regs_used);
 | 
			
		||||
	}
 | 
			
		||||
	if (arena_vm_start)
 | 
			
		||||
		emit_mov_imm64(&prog, X86_REG_R12,
 | 
			
		||||
			       arena_vm_start >> 32, (u32) arena_vm_start);
 | 
			
		||||
 | 
			
		||||
	ilen = prog - temp;
 | 
			
		||||
	if (rw_image)
 | 
			
		||||
| 
						 | 
				
			
			@ -1564,6 +1701,56 @@ st:			if (is_imm8(insn->off))
 | 
			
		|||
			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
 | 
			
		||||
		case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
 | 
			
		||||
		case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
 | 
			
		||||
		case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
 | 
			
		||||
			start_of_ldx = prog;
 | 
			
		||||
			emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
 | 
			
		||||
			goto populate_extable;
 | 
			
		||||
 | 
			
		||||
			/* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
 | 
			
		||||
		case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
 | 
			
		||||
		case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
 | 
			
		||||
		case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
 | 
			
		||||
		case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
 | 
			
		||||
		case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
 | 
			
		||||
		case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
 | 
			
		||||
		case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
 | 
			
		||||
		case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
 | 
			
		||||
			start_of_ldx = prog;
 | 
			
		||||
			if (BPF_CLASS(insn->code) == BPF_LDX)
 | 
			
		||||
				emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 | 
			
		||||
			else
 | 
			
		||||
				emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 | 
			
		||||
populate_extable:
 | 
			
		||||
			{
 | 
			
		||||
				struct exception_table_entry *ex;
 | 
			
		||||
				u8 *_insn = image + proglen + (start_of_ldx - temp);
 | 
			
		||||
				s64 delta;
 | 
			
		||||
 | 
			
		||||
				if (!bpf_prog->aux->extable)
 | 
			
		||||
					break;
 | 
			
		||||
 | 
			
		||||
				if (excnt >= bpf_prog->aux->num_exentries) {
 | 
			
		||||
					pr_err("mem32 extable bug\n");
 | 
			
		||||
					return -EFAULT;
 | 
			
		||||
				}
 | 
			
		||||
				ex = &bpf_prog->aux->extable[excnt++];
 | 
			
		||||
 | 
			
		||||
				delta = _insn - (u8 *)&ex->insn;
 | 
			
		||||
				/* switch ex to rw buffer for writes */
 | 
			
		||||
				ex = (void *)rw_image + ((void *)ex - (void *)image);
 | 
			
		||||
 | 
			
		||||
				ex->insn = delta;
 | 
			
		||||
 | 
			
		||||
				ex->data = EX_TYPE_BPF;
 | 
			
		||||
 | 
			
		||||
				ex->fixup = (prog - start_of_ldx) |
 | 
			
		||||
					((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 | 
			
		||||
		case BPF_LDX | BPF_MEM | BPF_B:
 | 
			
		||||
		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 | 
			
		||||
| 
						 | 
				
			
			@ -2036,6 +2223,8 @@ st:			if (is_imm8(insn->off))
 | 
			
		|||
				pop_r12(&prog);
 | 
			
		||||
			} else {
 | 
			
		||||
				pop_callee_regs(&prog, callee_regs_used);
 | 
			
		||||
				if (arena_vm_start)
 | 
			
		||||
					pop_r12(&prog);
 | 
			
		||||
			}
 | 
			
		||||
			EMIT1(0xC9);         /* leave */
 | 
			
		||||
			emit_return(&prog, image + addrs[i - 1] + (prog - temp));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1458,6 +1458,7 @@ struct bpf_prog_aux {
 | 
			
		|||
	bool xdp_has_frags;
 | 
			
		||||
	bool exception_cb;
 | 
			
		||||
	bool exception_boundary;
 | 
			
		||||
	struct bpf_arena *arena;
 | 
			
		||||
	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 | 
			
		||||
	const struct btf_type *attach_func_proto;
 | 
			
		||||
	/* function name for valid attach_btf_id */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -72,6 +72,9 @@ struct ctl_table_header;
 | 
			
		|||
/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
 | 
			
		||||
#define BPF_PROBE_MEMSX	0x40
 | 
			
		||||
 | 
			
		||||
/* unused opcode to mark special load instruction. Same as BPF_MSH */
 | 
			
		||||
#define BPF_PROBE_MEM32	0xa0
 | 
			
		||||
 | 
			
		||||
/* unused opcode to mark call to interpreter with arguments */
 | 
			
		||||
#define BPF_CALL_ARGS	0xe0
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue