mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	bpf: Augment raw_tp arguments with PTR_MAYBE_NULL
Arguments to a raw tracepoint are tagged as trusted, which carries the semantics that the pointer will be non-NULL. However, in certain cases, a raw tracepoint argument may end up being NULL. More context about this issue is available in [0]. Thus, there is a discrepancy between the reality, that raw_tp arguments can actually be NULL, and the verifier's knowledge, that they are never NULL, causing explicit NULL check branch to be dead code eliminated. A previous attempt [1], i.e. the second fixed commit, was made to simulate symbolic execution as if in most accesses, the argument is a non-NULL raw_tp, except for conditional jumps. This tried to suppress branch prediction while preserving compatibility, but surfaced issues with production programs that were difficult to solve without increasing verifier complexity. A more complete discussion of issues and fixes is available at [2]. Fix this by maintaining an explicit list of tracepoints where the arguments are known to be NULL, and mark the positional arguments as PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments are known to be ERR_PTR, and mark these arguments as scalar values to prevent potential dereference. Each hex digit is used to encode NULL-ness (0x1) or ERR_PTR-ness (0x2), shifted by the zero-indexed argument number x 4. This can be represented as follows: 1st arg: 0x1 2nd arg: 0x10 3rd arg: 0x100 ... and so on (likewise for ERR_PTR case). In the future, an automated pass will be used to produce such a list, or insert __nullable annotations automatically for tracepoints. Each compilation unit will be analyzed and results will be collated to find whether a tracepoint pointer is definitely not null, maybe null, or an unknown state where verifier conservatively marks it PTR_MAYBE_NULL. A proof of concept of this tool from Eduard is available at [3]. Note that in case we don't find a specification in the raw_tp_null_args array and the tracepoint belongs to a kernel module, we will conservatively mark the arguments as PTR_MAYBE_NULL. This is because unlike for in-tree modules, out-of-tree module tracepoints may pass NULL freely to the tracepoint. We don't protect against such tracepoints passing ERR_PTR (which is uncommon anyway), lest we mark all such arguments as SCALAR_VALUE. While we are it, let's adjust the test raw_tp_null to not perform dereference of the skb->mark, as that won't be allowed anymore, and make it more robust by using inline assembly to test the dead code elimination behavior, which should still stay the same. [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@jlelli-thinkpadt14gen4.remote.csb [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@gmail.com [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@gmail.com [3]: https://github.com/eddyz87/llvm-project/tree/nullness-for-tracepoint-params Reported-by: Juri Lelli <juri.lelli@redhat.com> # original bug Reported-by: Manu Bretelle <chantra@meta.com> # bugs in masking fix Fixes:3f00c52393("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs") Fixes:cb4158ce8e("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL") Reviewed-by: Eduard Zingerman <eddyz87@gmail.com> Co-developed-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20241213221929.3495062-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
		
							parent
							
								
									c00d738e16
								
							
						
					
					
						commit
						838a10bd2e
					
				
					 2 changed files with 147 additions and 10 deletions
				
			
		
							
								
								
									
										138
									
								
								kernel/bpf/btf.c
									
									
									
									
									
								
							
							
						
						
									
										138
									
								
								kernel/bpf/btf.c
									
									
									
									
									
								
							|  | @ -6439,6 +6439,101 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, | |||
| 	return off; | ||||
| } | ||||
| 
 | ||||
| struct bpf_raw_tp_null_args { | ||||
| 	const char *func; | ||||
| 	u64 mask; | ||||
| }; | ||||
| 
 | ||||
| static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { | ||||
| 	/* sched */ | ||||
| 	{ "sched_pi_setprio", 0x10 }, | ||||
| 	/* ... from sched_numa_pair_template event class */ | ||||
| 	{ "sched_stick_numa", 0x100 }, | ||||
| 	{ "sched_swap_numa", 0x100 }, | ||||
| 	/* afs */ | ||||
| 	{ "afs_make_fs_call", 0x10 }, | ||||
| 	{ "afs_make_fs_calli", 0x10 }, | ||||
| 	{ "afs_make_fs_call1", 0x10 }, | ||||
| 	{ "afs_make_fs_call2", 0x10 }, | ||||
| 	{ "afs_protocol_error", 0x1 }, | ||||
| 	{ "afs_flock_ev", 0x10 }, | ||||
| 	/* cachefiles */ | ||||
| 	{ "cachefiles_lookup", 0x1 | 0x200 }, | ||||
| 	{ "cachefiles_unlink", 0x1 }, | ||||
| 	{ "cachefiles_rename", 0x1 }, | ||||
| 	{ "cachefiles_prep_read", 0x1 }, | ||||
| 	{ "cachefiles_mark_active", 0x1 }, | ||||
| 	{ "cachefiles_mark_failed", 0x1 }, | ||||
| 	{ "cachefiles_mark_inactive", 0x1 }, | ||||
| 	{ "cachefiles_vfs_error", 0x1 }, | ||||
| 	{ "cachefiles_io_error", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_open", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_copen", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_close", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_read", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_cread", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_fd_write", 0x1 }, | ||||
| 	{ "cachefiles_ondemand_fd_release", 0x1 }, | ||||
| 	/* ext4, from ext4__mballoc event class */ | ||||
| 	{ "ext4_mballoc_discard", 0x10 }, | ||||
| 	{ "ext4_mballoc_free", 0x10 }, | ||||
| 	/* fib */ | ||||
| 	{ "fib_table_lookup", 0x100 }, | ||||
| 	/* filelock */ | ||||
| 	/* ... from filelock_lock event class */ | ||||
| 	{ "posix_lock_inode", 0x10 }, | ||||
| 	{ "fcntl_setlk", 0x10 }, | ||||
| 	{ "locks_remove_posix", 0x10 }, | ||||
| 	{ "flock_lock_inode", 0x10 }, | ||||
| 	/* ... from filelock_lease event class */ | ||||
| 	{ "break_lease_noblock", 0x10 }, | ||||
| 	{ "break_lease_block", 0x10 }, | ||||
| 	{ "break_lease_unblock", 0x10 }, | ||||
| 	{ "generic_delete_lease", 0x10 }, | ||||
| 	{ "time_out_leases", 0x10 }, | ||||
| 	/* host1x */ | ||||
| 	{ "host1x_cdma_push_gather", 0x10000 }, | ||||
| 	/* huge_memory */ | ||||
| 	{ "mm_khugepaged_scan_pmd", 0x10 }, | ||||
| 	{ "mm_collapse_huge_page_isolate", 0x1 }, | ||||
| 	{ "mm_khugepaged_scan_file", 0x10 }, | ||||
| 	{ "mm_khugepaged_collapse_file", 0x10 }, | ||||
| 	/* kmem */ | ||||
| 	{ "mm_page_alloc", 0x1 }, | ||||
| 	{ "mm_page_pcpu_drain", 0x1 }, | ||||
| 	/* .. from mm_page event class */ | ||||
| 	{ "mm_page_alloc_zone_locked", 0x1 }, | ||||
| 	/* netfs */ | ||||
| 	{ "netfs_failure", 0x10 }, | ||||
| 	/* power */ | ||||
| 	{ "device_pm_callback_start", 0x10 }, | ||||
| 	/* qdisc */ | ||||
| 	{ "qdisc_dequeue", 0x1000 }, | ||||
| 	/* rxrpc */ | ||||
| 	{ "rxrpc_recvdata", 0x1 }, | ||||
| 	{ "rxrpc_resend", 0x10 }, | ||||
| 	/* sunrpc */ | ||||
| 	{ "xs_stream_read_data", 0x1 }, | ||||
| 	/* ... from xprt_cong_event event class */ | ||||
| 	{ "xprt_reserve_cong", 0x10 }, | ||||
| 	{ "xprt_release_cong", 0x10 }, | ||||
| 	{ "xprt_get_cong", 0x10 }, | ||||
| 	{ "xprt_put_cong", 0x10 }, | ||||
| 	/* tcp */ | ||||
| 	{ "tcp_send_reset", 0x11 }, | ||||
| 	/* tegra_apb_dma */ | ||||
| 	{ "tegra_dma_tx_status", 0x100 }, | ||||
| 	/* timer_migration */ | ||||
| 	{ "tmigr_update_events", 0x1 }, | ||||
| 	/* writeback, from writeback_folio_template event class */ | ||||
| 	{ "writeback_dirty_folio", 0x10 }, | ||||
| 	{ "folio_wait_writeback", 0x10 }, | ||||
| 	/* rdma */ | ||||
| 	{ "mr_integ_alloc", 0x2000 }, | ||||
| 	/* bpf_testmod */ | ||||
| 	{ "bpf_testmod_test_read", 0x0 }, | ||||
| }; | ||||
| 
 | ||||
| bool btf_ctx_access(int off, int size, enum bpf_access_type type, | ||||
| 		    const struct bpf_prog *prog, | ||||
| 		    struct bpf_insn_access_aux *info) | ||||
|  | @ -6449,6 +6544,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, | |||
| 	const char *tname = prog->aux->attach_func_name; | ||||
| 	struct bpf_verifier_log *log = info->log; | ||||
| 	const struct btf_param *args; | ||||
| 	bool ptr_err_raw_tp = false; | ||||
| 	const char *tag_value; | ||||
| 	u32 nr_args, arg; | ||||
| 	int i, ret; | ||||
|  | @ -6597,6 +6693,39 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, | |||
| 	if (btf_param_match_suffix(btf, &args[arg], "__nullable")) | ||||
| 		info->reg_type |= PTR_MAYBE_NULL; | ||||
| 
 | ||||
| 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { | ||||
| 		struct btf *btf = prog->aux->attach_btf; | ||||
| 		const struct btf_type *t; | ||||
| 		const char *tname; | ||||
| 
 | ||||
| 		/* BTF lookups cannot fail, return false on error */ | ||||
| 		t = btf_type_by_id(btf, prog->aux->attach_btf_id); | ||||
| 		if (!t) | ||||
| 			return false; | ||||
| 		tname = btf_name_by_offset(btf, t->name_off); | ||||
| 		if (!tname) | ||||
| 			return false; | ||||
| 		/* Checked by bpf_check_attach_target */ | ||||
| 		tname += sizeof("btf_trace_") - 1; | ||||
| 		for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { | ||||
| 			/* Is this a func with potential NULL args? */ | ||||
| 			if (strcmp(tname, raw_tp_null_args[i].func)) | ||||
| 				continue; | ||||
| 			if (raw_tp_null_args[i].mask & (0x1 << (arg * 4))) | ||||
| 				info->reg_type |= PTR_MAYBE_NULL; | ||||
| 			/* Is the current arg IS_ERR? */ | ||||
| 			if (raw_tp_null_args[i].mask & (0x2 << (arg * 4))) | ||||
| 				ptr_err_raw_tp = true; | ||||
| 			break; | ||||
| 		} | ||||
| 		/* If we don't know NULL-ness specification and the tracepoint
 | ||||
| 		 * is coming from a loadable module, be conservative and mark | ||||
| 		 * argument as PTR_MAYBE_NULL. | ||||
| 		 */ | ||||
| 		if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf)) | ||||
| 			info->reg_type |= PTR_MAYBE_NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (tgt_prog) { | ||||
| 		enum bpf_prog_type tgt_type; | ||||
| 
 | ||||
|  | @ -6641,6 +6770,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, | |||
| 	bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", | ||||
| 		tname, arg, info->btf_id, btf_type_str(t), | ||||
| 		__btf_name_by_offset(btf, t->name_off)); | ||||
| 
 | ||||
| 	/* Perform all checks on the validity of type for this argument, but if
 | ||||
| 	 * we know it can be IS_ERR at runtime, scrub pointer type and mark as | ||||
| 	 * scalar. | ||||
| 	 */ | ||||
| 	if (ptr_err_raw_tp) { | ||||
| 		bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg); | ||||
| 		info->reg_type = SCALAR_VALUE; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(btf_ctx_access); | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| 
 | ||||
| #include <vmlinux.h> | ||||
| #include <bpf/bpf_tracing.h> | ||||
| #include "bpf_misc.h" | ||||
| 
 | ||||
| char _license[] SEC("license") = "GPL"; | ||||
| 
 | ||||
|  | @ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) | |||
| 	if (task->pid != tid) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	i = i + skb->mark + 1; | ||||
| 	/* The compiler may move the NULL check before this deref, which causes
 | ||||
| 	 * the load to fail as deref of scalar. Prevent that by using a barrier. | ||||
| 	/* If dead code elimination kicks in, the increment +=2 will be
 | ||||
| 	 * removed. For raw_tp programs attaching to tracepoints in kernel | ||||
| 	 * modules, we mark input arguments as PTR_MAYBE_NULL, so branch | ||||
| 	 * prediction should never kick in. | ||||
| 	 */ | ||||
| 	barrier(); | ||||
| 	/* If dead code elimination kicks in, the increment below will
 | ||||
| 	 * be removed. For raw_tp programs, we mark input arguments as | ||||
| 	 * PTR_MAYBE_NULL, so branch prediction should never kick in. | ||||
| 	 */ | ||||
| 	if (!skb) | ||||
| 		i += 2; | ||||
| 	asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" | ||||
| 			: [i]"+r"(i) | ||||
| 			: [ctx]"r"(skb) | ||||
| 			: "memory"); | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Kumar Kartikeya Dwivedi
						Kumar Kartikeya Dwivedi