mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Klara Modin reported warnings for a kernel configured with BPF_JIT but
without MODULES:
[   44.131296] Trying to vfree() bad address (000000004a17c299)
[   44.138024] WARNING: CPU: 1 PID: 193 at mm/vmalloc.c:3189 remove_vm_area (mm/vmalloc.c:3189 (discriminator 1))
[   44.146675] CPU: 1 PID: 193 Comm: kworker/1:2 Tainted: G      D W          6.9.0-01786-g2c9e5d4a0082 #25
[   44.158229] Hardware name: Raspberry Pi 3 Model B (DT)
[   44.164433] Workqueue: events bpf_prog_free_deferred
[   44.170492] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   44.178601] pc : remove_vm_area (mm/vmalloc.c:3189 (discriminator 1))
[   44.183705] lr : remove_vm_area (mm/vmalloc.c:3189 (discriminator 1))
[   44.188772] sp : ffff800082a13c70
[   44.193112] x29: ffff800082a13c70 x28: 0000000000000000 x27: 0000000000000000
[   44.201384] x26: 0000000000000000 x25: ffff00003a44efa0 x24: 00000000d4202000
[   44.209658] x23: ffff800081223dd0 x22: ffff00003a198a40 x21: ffff8000814dd880
[   44.217924] x20: 00000000d4202000 x19: ffff8000814dd880 x18: 0000000000000006
[   44.226206] x17: 0000000000000000 x16: 0000000000000020 x15: 0000000000000002
[   44.234460] x14: ffff8000811a6370 x13: 0000000020000000 x12: 0000000000000000
[   44.242710] x11: ffff8000811a6370 x10: 0000000000000144 x9 : ffff8000811fe370
[   44.250959] x8 : 0000000000017fe8 x7 : 00000000fffff000 x6 : ffff8000811fe370
[   44.259206] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[   44.267457] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000002203240
[   44.275703] Call trace:
[   44.279158] remove_vm_area (mm/vmalloc.c:3189 (discriminator 1))
[   44.283858] vfree (mm/vmalloc.c:3322)
[   44.287835] execmem_free (mm/execmem.c:70)
[   44.292347] bpf_jit_free_exec+0x10/0x1c
[   44.297283] bpf_prog_pack_free (kernel/bpf/core.c:1006)
[   44.302457] bpf_jit_binary_pack_free (kernel/bpf/core.c:1195)
[   44.307951] bpf_jit_free (include/linux/filter.h:1083 arch/arm64/net/bpf_jit_comp.c:2474)
[   44.312342] bpf_prog_free_deferred (kernel/bpf/core.c:2785)
[   44.317785] process_one_work (kernel/workqueue.c:3273)
[   44.322684] worker_thread (kernel/workqueue.c:3342 (discriminator 2) kernel/workqueue.c:3429 (discriminator 2))
[   44.327292] kthread (kernel/kthread.c:388)
[   44.331342] ret_from_fork (arch/arm64/kernel/entry.S:861)
The problem is because bpf_arch_text_copy() silently fails to write to the
read-only area as a result of patch_map() faulting and the resulting
-EFAULT being chucked away.
Update patch_map() to use CONFIG_EXECMEM instead of
CONFIG_STRICT_MODULE_RWX to check for vmalloc addresses.
Link: https://lkml.kernel.org/r/20240521213813.703309-1-rppt@kernel.org
Fixes: 2c9e5d4a00 ("bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of")
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
Reported-by: Klara Modin <klarasmodin@gmail.com>
Closes: https://lore.kernel.org/all/7983fbbf-0127-457c-9394-8d6e4299c685@gmail.com
Tested-by: Klara Modin <klarasmodin@gmail.com>
Cc: Björn Töpel <bjorn@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			242 lines
		
	
	
	
		
			5.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			242 lines
		
	
	
	
		
			5.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-only
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/smp.h>
 | 
						|
#include <linux/spinlock.h>
 | 
						|
#include <linux/stop_machine.h>
 | 
						|
#include <linux/uaccess.h>
 | 
						|
 | 
						|
#include <asm/cacheflush.h>
 | 
						|
#include <asm/fixmap.h>
 | 
						|
#include <asm/insn.h>
 | 
						|
#include <asm/kprobes.h>
 | 
						|
#include <asm/patching.h>
 | 
						|
#include <asm/sections.h>
 | 
						|
 | 
						|
static DEFINE_RAW_SPINLOCK(patch_lock);
 | 
						|
 | 
						|
static bool is_exit_text(unsigned long addr)
 | 
						|
{
 | 
						|
	/* discarded with init text/data */
 | 
						|
	return system_state < SYSTEM_RUNNING &&
 | 
						|
		addr >= (unsigned long)__exittext_begin &&
 | 
						|
		addr < (unsigned long)__exittext_end;
 | 
						|
}
 | 
						|
 | 
						|
static bool is_image_text(unsigned long addr)
 | 
						|
{
 | 
						|
	return core_kernel_text(addr) || is_exit_text(addr);
 | 
						|
}
 | 
						|
 | 
						|
static void __kprobes *patch_map(void *addr, int fixmap)
 | 
						|
{
 | 
						|
	unsigned long uintaddr = (uintptr_t) addr;
 | 
						|
	bool image = is_image_text(uintaddr);
 | 
						|
	struct page *page;
 | 
						|
 | 
						|
	if (image)
 | 
						|
		page = phys_to_page(__pa_symbol(addr));
 | 
						|
	else if (IS_ENABLED(CONFIG_EXECMEM))
 | 
						|
		page = vmalloc_to_page(addr);
 | 
						|
	else
 | 
						|
		return addr;
 | 
						|
 | 
						|
	BUG_ON(!page);
 | 
						|
	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
 | 
						|
			(uintaddr & ~PAGE_MASK));
 | 
						|
}
 | 
						|
 | 
						|
static void __kprobes patch_unmap(int fixmap)
 | 
						|
{
 | 
						|
	clear_fixmap(fixmap);
 | 
						|
}
 | 
						|
/*
 | 
						|
 * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
 | 
						|
 * little-endian.
 | 
						|
 */
 | 
						|
int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
 | 
						|
{
 | 
						|
	int ret;
 | 
						|
	__le32 val;
 | 
						|
 | 
						|
	ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
 | 
						|
	if (!ret)
 | 
						|
		*insnp = le32_to_cpu(val);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
 | 
						|
{
 | 
						|
	void *waddr = addr;
 | 
						|
	unsigned long flags = 0;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	raw_spin_lock_irqsave(&patch_lock, flags);
 | 
						|
	waddr = patch_map(addr, FIX_TEXT_POKE0);
 | 
						|
 | 
						|
	ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
 | 
						|
 | 
						|
	patch_unmap(FIX_TEXT_POKE0);
 | 
						|
	raw_spin_unlock_irqrestore(&patch_lock, flags);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
int __kprobes aarch64_insn_write(void *addr, u32 insn)
 | 
						|
{
 | 
						|
	return __aarch64_insn_write(addr, cpu_to_le32(insn));
 | 
						|
}
 | 
						|
 | 
						|
noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
 | 
						|
{
 | 
						|
	u64 *waddr;
 | 
						|
	unsigned long flags;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	raw_spin_lock_irqsave(&patch_lock, flags);
 | 
						|
	waddr = patch_map(addr, FIX_TEXT_POKE0);
 | 
						|
 | 
						|
	ret = copy_to_kernel_nofault(waddr, &val, sizeof(val));
 | 
						|
 | 
						|
	patch_unmap(FIX_TEXT_POKE0);
 | 
						|
	raw_spin_unlock_irqrestore(&patch_lock, flags);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len);
 | 
						|
 | 
						|
static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len)
 | 
						|
{
 | 
						|
	unsigned long flags;
 | 
						|
	size_t patched = 0;
 | 
						|
	size_t size;
 | 
						|
	void *waddr;
 | 
						|
	void *ptr;
 | 
						|
 | 
						|
	raw_spin_lock_irqsave(&patch_lock, flags);
 | 
						|
 | 
						|
	while (patched < len) {
 | 
						|
		ptr = addr + patched;
 | 
						|
		size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
 | 
						|
			     len - patched);
 | 
						|
 | 
						|
		waddr = patch_map(ptr, FIX_TEXT_POKE0);
 | 
						|
		func(waddr, src, patched, size);
 | 
						|
		patch_unmap(FIX_TEXT_POKE0);
 | 
						|
 | 
						|
		patched += size;
 | 
						|
	}
 | 
						|
	raw_spin_unlock_irqrestore(&patch_lock, flags);
 | 
						|
 | 
						|
	flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
 | 
						|
 | 
						|
	return addr;
 | 
						|
}
 | 
						|
 | 
						|
static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len)
 | 
						|
{
 | 
						|
	copy_to_kernel_nofault(dst, src + patched, len);
 | 
						|
}
 | 
						|
 | 
						|
static void text_poke_memset(void *dst, void *src, size_t patched, size_t len)
 | 
						|
{
 | 
						|
	u32 c = *(u32 *)src;
 | 
						|
 | 
						|
	memset32(dst, c, len / 4);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
 | 
						|
 * @dst: address to modify
 | 
						|
 * @src: source of the copy
 | 
						|
 * @len: length to copy
 | 
						|
 *
 | 
						|
 * Useful for JITs to dump new code blocks into unused regions of RX memory.
 | 
						|
 */
 | 
						|
noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len)
 | 
						|
{
 | 
						|
	/* A64 instructions must be word aligned */
 | 
						|
	if ((uintptr_t)dst & 0x3)
 | 
						|
		return NULL;
 | 
						|
 | 
						|
	return __text_poke(text_poke_memcpy, dst, src, len);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * aarch64_insn_set - memset for RX memory regions.
 | 
						|
 * @dst: address to modify
 | 
						|
 * @insn: value to set
 | 
						|
 * @len: length of memory region.
 | 
						|
 *
 | 
						|
 * Useful for JITs to fill regions of RX memory with illegal instructions.
 | 
						|
 */
 | 
						|
noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len)
 | 
						|
{
 | 
						|
	if ((uintptr_t)dst & 0x3)
 | 
						|
		return NULL;
 | 
						|
 | 
						|
	return __text_poke(text_poke_memset, dst, &insn, len);
 | 
						|
}
 | 
						|
 | 
						|
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 | 
						|
{
 | 
						|
	u32 *tp = addr;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	/* A64 instructions must be word aligned */
 | 
						|
	if ((uintptr_t)tp & 0x3)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	ret = aarch64_insn_write(tp, insn);
 | 
						|
	if (ret == 0)
 | 
						|
		caches_clean_inval_pou((uintptr_t)tp,
 | 
						|
				     (uintptr_t)tp + AARCH64_INSN_SIZE);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
struct aarch64_insn_patch {
 | 
						|
	void		**text_addrs;
 | 
						|
	u32		*new_insns;
 | 
						|
	int		insn_cnt;
 | 
						|
	atomic_t	cpu_count;
 | 
						|
};
 | 
						|
 | 
						|
static int __kprobes aarch64_insn_patch_text_cb(void *arg)
 | 
						|
{
 | 
						|
	int i, ret = 0;
 | 
						|
	struct aarch64_insn_patch *pp = arg;
 | 
						|
 | 
						|
	/* The last CPU becomes master */
 | 
						|
	if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
 | 
						|
		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
 | 
						|
			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
 | 
						|
							     pp->new_insns[i]);
 | 
						|
		/* Notify other processors with an additional increment. */
 | 
						|
		atomic_inc(&pp->cpu_count);
 | 
						|
	} else {
 | 
						|
		while (atomic_read(&pp->cpu_count) <= num_online_cpus())
 | 
						|
			cpu_relax();
 | 
						|
		isb();
 | 
						|
	}
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
 | 
						|
{
 | 
						|
	struct aarch64_insn_patch patch = {
 | 
						|
		.text_addrs = addrs,
 | 
						|
		.new_insns = insns,
 | 
						|
		.insn_cnt = cnt,
 | 
						|
		.cpu_count = ATOMIC_INIT(0),
 | 
						|
	};
 | 
						|
 | 
						|
	if (cnt <= 0)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
 | 
						|
				       cpu_online_mask);
 | 
						|
}
 |