mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	bpf: Add bpf_core_add_cands() and wire it into bpf_core_apply_relo_insn().
Given BPF program's BTF root type name perform the following steps: . search in vmlinux candidate cache. . if (present in cache and candidate list >= 1) return candidate list. . do a linear search through kernel BTFs for possible candidates. . regardless of number of candidates found populate vmlinux cache. . if (candidate list >= 1) return candidate list. . search in module candidate cache. . if (present in cache) return candidate list (even if list is empty). . do a linear search through BTFs of all kernel modules collecting candidates from all of them. . regardless of number of candidates found populate module cache. . return candidate list. Then wire the result into bpf_core_apply_relo_insn(). When BPF program is trying to CO-RE relocate a type that doesn't exist in either vmlinux BTF or in modules BTFs these steps will perform 2 cache lookups when cache is hit. Note the cache doesn't prevent the abuse by the program that might have lots of relocations that cannot be resolved. Hence cond_resched(). CO-RE in the kernel requires CAP_BPF, since BTF loading requires it. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20211201181040.23337-9-alexei.starovoitov@gmail.com
This commit is contained in:
		
							parent
							
								
									03d5b99138
								
							
						
					
					
						commit
						1e89106da2
					
				
					 1 changed files with 345 additions and 1 deletions
				
			
		
							
								
								
									
										346
									
								
								kernel/bpf/btf.c
									
									
									
									
									
								
							
							
						
						
									
										346
									
								
								kernel/bpf/btf.c
									
									
									
									
									
								
							|  | @ -25,6 +25,7 @@ | |||
| #include <linux/kobject.h> | ||||
| #include <linux/sysfs.h> | ||||
| #include <net/sock.h> | ||||
| #include "../tools/lib/bpf/relo_core.h" | ||||
| 
 | ||||
| /* BTF (BPF Type Format) is the meta data format which describes
 | ||||
|  * the data types of BPF program/map.  Hence, it basically focus | ||||
|  | @ -6169,6 +6170,8 @@ btf_module_read(struct file *file, struct kobject *kobj, | |||
| 	return len; | ||||
| } | ||||
| 
 | ||||
| static void purge_cand_cache(struct btf *btf); | ||||
| 
 | ||||
| static int btf_module_notify(struct notifier_block *nb, unsigned long op, | ||||
| 			     void *module) | ||||
| { | ||||
|  | @ -6203,6 +6206,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, | |||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		purge_cand_cache(NULL); | ||||
| 		mutex_lock(&btf_module_mutex); | ||||
| 		btf_mod->module = module; | ||||
| 		btf_mod->btf = btf; | ||||
|  | @ -6245,6 +6249,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, | |||
| 			list_del(&btf_mod->list); | ||||
| 			if (btf_mod->sysfs_attr) | ||||
| 				sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); | ||||
| 			purge_cand_cache(btf_mod->btf); | ||||
| 			btf_put(btf_mod->btf); | ||||
| 			kfree(btf_mod->sysfs_attr); | ||||
| 			kfree(btf_mod); | ||||
|  | @ -6440,8 +6445,347 @@ size_t bpf_core_essential_name_len(const char *name) | |||
| 	return n; | ||||
| } | ||||
| 
 | ||||
| struct bpf_cand_cache { | ||||
| 	const char *name; | ||||
| 	u32 name_len; | ||||
| 	u16 kind; | ||||
| 	u16 cnt; | ||||
| 	struct { | ||||
| 		const struct btf *btf; | ||||
| 		u32 id; | ||||
| 	} cands[]; | ||||
| }; | ||||
| 
 | ||||
| static void bpf_free_cands(struct bpf_cand_cache *cands) | ||||
| { | ||||
| 	if (!cands->cnt) | ||||
| 		/* empty candidate array was allocated on stack */ | ||||
| 		return; | ||||
| 	kfree(cands); | ||||
| } | ||||
| 
 | ||||
| static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands) | ||||
| { | ||||
| 	kfree(cands->name); | ||||
| 	kfree(cands); | ||||
| } | ||||
| 
 | ||||
| #define VMLINUX_CAND_CACHE_SIZE 31 | ||||
| static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; | ||||
| 
 | ||||
| #define MODULE_CAND_CACHE_SIZE 31 | ||||
| static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; | ||||
| 
 | ||||
| static DEFINE_MUTEX(cand_cache_mutex); | ||||
| 
 | ||||
| static void __print_cand_cache(struct bpf_verifier_log *log, | ||||
| 			       struct bpf_cand_cache **cache, | ||||
| 			       int cache_size) | ||||
| { | ||||
| 	struct bpf_cand_cache *cc; | ||||
| 	int i, j; | ||||
| 
 | ||||
| 	for (i = 0; i < cache_size; i++) { | ||||
| 		cc = cache[i]; | ||||
| 		if (!cc) | ||||
| 			continue; | ||||
| 		bpf_log(log, "[%d]%s(", i, cc->name); | ||||
| 		for (j = 0; j < cc->cnt; j++) { | ||||
| 			bpf_log(log, "%d", cc->cands[j].id); | ||||
| 			if (j < cc->cnt - 1) | ||||
| 				bpf_log(log, " "); | ||||
| 		} | ||||
| 		bpf_log(log, "), "); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void print_cand_cache(struct bpf_verifier_log *log) | ||||
| { | ||||
| 	mutex_lock(&cand_cache_mutex); | ||||
| 	bpf_log(log, "vmlinux_cand_cache:"); | ||||
| 	__print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); | ||||
| 	bpf_log(log, "\nmodule_cand_cache:"); | ||||
| 	__print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE); | ||||
| 	bpf_log(log, "\n"); | ||||
| 	mutex_unlock(&cand_cache_mutex); | ||||
| } | ||||
| 
 | ||||
| static u32 hash_cands(struct bpf_cand_cache *cands) | ||||
| { | ||||
| 	return jhash(cands->name, cands->name_len, 0); | ||||
| } | ||||
| 
 | ||||
| static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands, | ||||
| 					       struct bpf_cand_cache **cache, | ||||
| 					       int cache_size) | ||||
| { | ||||
| 	struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size]; | ||||
| 
 | ||||
| 	if (cc && cc->name_len == cands->name_len && | ||||
| 	    !strncmp(cc->name, cands->name, cands->name_len)) | ||||
| 		return cc; | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static size_t sizeof_cands(int cnt) | ||||
| { | ||||
| 	return offsetof(struct bpf_cand_cache, cands[cnt]); | ||||
| } | ||||
| 
 | ||||
| static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, | ||||
| 						  struct bpf_cand_cache **cache, | ||||
| 						  int cache_size) | ||||
| { | ||||
| 	struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands; | ||||
| 
 | ||||
| 	if (*cc) { | ||||
| 		bpf_free_cands_from_cache(*cc); | ||||
| 		*cc = NULL; | ||||
| 	} | ||||
| 	new_cands = kmalloc(sizeof_cands(cands->cnt), GFP_KERNEL); | ||||
| 	if (!new_cands) { | ||||
| 		bpf_free_cands(cands); | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	} | ||||
| 	memcpy(new_cands, cands, sizeof_cands(cands->cnt)); | ||||
| 	/* strdup the name, since it will stay in cache.
 | ||||
| 	 * the cands->name points to strings in prog's BTF and the prog can be unloaded. | ||||
| 	 */ | ||||
| 	new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); | ||||
| 	bpf_free_cands(cands); | ||||
| 	if (!new_cands->name) { | ||||
| 		kfree(new_cands); | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	} | ||||
| 	*cc = new_cands; | ||||
| 	return new_cands; | ||||
| } | ||||
| 
 | ||||
| static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, | ||||
| 			       int cache_size) | ||||
| { | ||||
| 	struct bpf_cand_cache *cc; | ||||
| 	int i, j; | ||||
| 
 | ||||
| 	for (i = 0; i < cache_size; i++) { | ||||
| 		cc = cache[i]; | ||||
| 		if (!cc) | ||||
| 			continue; | ||||
| 		if (!btf) { | ||||
| 			/* when new module is loaded purge all of module_cand_cache,
 | ||||
| 			 * since new module might have candidates with the name | ||||
| 			 * that matches cached cands. | ||||
| 			 */ | ||||
| 			bpf_free_cands_from_cache(cc); | ||||
| 			cache[i] = NULL; | ||||
| 			continue; | ||||
| 		} | ||||
| 		/* when module is unloaded purge cache entries
 | ||||
| 		 * that match module's btf | ||||
| 		 */ | ||||
| 		for (j = 0; j < cc->cnt; j++) | ||||
| 			if (cc->cands[j].btf == btf) { | ||||
| 				bpf_free_cands_from_cache(cc); | ||||
| 				cache[i] = NULL; | ||||
| 				break; | ||||
| 			} | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| static void purge_cand_cache(struct btf *btf) | ||||
| { | ||||
| 	mutex_lock(&cand_cache_mutex); | ||||
| 	__purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); | ||||
| 	mutex_unlock(&cand_cache_mutex); | ||||
| } | ||||
| 
 | ||||
| static struct bpf_cand_cache * | ||||
| bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, | ||||
| 		   int targ_start_id) | ||||
| { | ||||
| 	struct bpf_cand_cache *new_cands; | ||||
| 	const struct btf_type *t; | ||||
| 	const char *targ_name; | ||||
| 	size_t targ_essent_len; | ||||
| 	int n, i; | ||||
| 
 | ||||
| 	n = btf_nr_types(targ_btf); | ||||
| 	for (i = targ_start_id; i < n; i++) { | ||||
| 		t = btf_type_by_id(targ_btf, i); | ||||
| 		if (btf_kind(t) != cands->kind) | ||||
| 			continue; | ||||
| 
 | ||||
| 		targ_name = btf_name_by_offset(targ_btf, t->name_off); | ||||
| 		if (!targ_name) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/* the resched point is before strncmp to make sure that search
 | ||||
| 		 * for non-existing name will have a chance to schedule(). | ||||
| 		 */ | ||||
| 		cond_resched(); | ||||
| 
 | ||||
| 		if (strncmp(cands->name, targ_name, cands->name_len) != 0) | ||||
| 			continue; | ||||
| 
 | ||||
| 		targ_essent_len = bpf_core_essential_name_len(targ_name); | ||||
| 		if (targ_essent_len != cands->name_len) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/* most of the time there is only one candidate for a given kind+name pair */ | ||||
| 		new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); | ||||
| 		if (!new_cands) { | ||||
| 			bpf_free_cands(cands); | ||||
| 			return ERR_PTR(-ENOMEM); | ||||
| 		} | ||||
| 
 | ||||
| 		memcpy(new_cands, cands, sizeof_cands(cands->cnt)); | ||||
| 		bpf_free_cands(cands); | ||||
| 		cands = new_cands; | ||||
| 		cands->cands[cands->cnt].btf = targ_btf; | ||||
| 		cands->cands[cands->cnt].id = i; | ||||
| 		cands->cnt++; | ||||
| 	} | ||||
| 	return cands; | ||||
| } | ||||
| 
 | ||||
| static struct bpf_cand_cache * | ||||
| bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id) | ||||
| { | ||||
| 	struct bpf_cand_cache *cands, *cc, local_cand = {}; | ||||
| 	const struct btf *local_btf = ctx->btf; | ||||
| 	const struct btf_type *local_type; | ||||
| 	const struct btf *main_btf; | ||||
| 	size_t local_essent_len; | ||||
| 	struct btf *mod_btf; | ||||
| 	const char *name; | ||||
| 	int id; | ||||
| 
 | ||||
| 	main_btf = bpf_get_btf_vmlinux(); | ||||
| 	if (IS_ERR(main_btf)) | ||||
| 		return (void *)main_btf; | ||||
| 
 | ||||
| 	local_type = btf_type_by_id(local_btf, local_type_id); | ||||
| 	if (!local_type) | ||||
| 		return ERR_PTR(-EINVAL); | ||||
| 
 | ||||
| 	name = btf_name_by_offset(local_btf, local_type->name_off); | ||||
| 	if (str_is_empty(name)) | ||||
| 		return ERR_PTR(-EINVAL); | ||||
| 	local_essent_len = bpf_core_essential_name_len(name); | ||||
| 
 | ||||
| 	cands = &local_cand; | ||||
| 	cands->name = name; | ||||
| 	cands->kind = btf_kind(local_type); | ||||
| 	cands->name_len = local_essent_len; | ||||
| 
 | ||||
| 	cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); | ||||
| 	/* cands is a pointer to stack here */ | ||||
| 	if (cc) { | ||||
| 		if (cc->cnt) | ||||
| 			return cc; | ||||
| 		goto check_modules; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Attempt to find target candidates in vmlinux BTF first */ | ||||
| 	cands = bpf_core_add_cands(cands, main_btf, 1); | ||||
| 	if (IS_ERR(cands)) | ||||
| 		return cands; | ||||
| 
 | ||||
| 	/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ | ||||
| 
 | ||||
| 	/* populate cache even when cands->cnt == 0 */ | ||||
| 	cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); | ||||
| 	if (IS_ERR(cc)) | ||||
| 		return cc; | ||||
| 
 | ||||
| 	/* if vmlinux BTF has any candidate, don't go for module BTFs */ | ||||
| 	if (cc->cnt) | ||||
| 		return cc; | ||||
| 
 | ||||
| check_modules: | ||||
| 	/* cands is a pointer to stack here and cands->cnt == 0 */ | ||||
| 	cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); | ||||
| 	if (cc) | ||||
| 		/* if cache has it return it even if cc->cnt == 0 */ | ||||
| 		return cc; | ||||
| 
 | ||||
| 	/* If candidate is not found in vmlinux's BTF then search in module's BTFs */ | ||||
| 	spin_lock_bh(&btf_idr_lock); | ||||
| 	idr_for_each_entry(&btf_idr, mod_btf, id) { | ||||
| 		if (!btf_is_module(mod_btf)) | ||||
| 			continue; | ||||
| 		/* linear search could be slow hence unlock/lock
 | ||||
| 		 * the IDR to avoiding holding it for too long | ||||
| 		 */ | ||||
| 		btf_get(mod_btf); | ||||
| 		spin_unlock_bh(&btf_idr_lock); | ||||
| 		cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); | ||||
| 		if (IS_ERR(cands)) { | ||||
| 			btf_put(mod_btf); | ||||
| 			return cands; | ||||
| 		} | ||||
| 		spin_lock_bh(&btf_idr_lock); | ||||
| 		btf_put(mod_btf); | ||||
| 	} | ||||
| 	spin_unlock_bh(&btf_idr_lock); | ||||
| 	/* cands is a pointer to kmalloced memory here if cands->cnt > 0
 | ||||
| 	 * or pointer to stack if cands->cnd == 0. | ||||
| 	 * Copy it into the cache even when cands->cnt == 0 and | ||||
| 	 * return the result. | ||||
| 	 */ | ||||
| 	return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); | ||||
| } | ||||
| 
 | ||||
| int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, | ||||
| 		   int relo_idx, void *insn) | ||||
| { | ||||
| 	return -EOPNOTSUPP; | ||||
| 	bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; | ||||
| 	struct bpf_core_cand_list cands = {}; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (need_cands) { | ||||
| 		struct bpf_cand_cache *cc; | ||||
| 		int i; | ||||
| 
 | ||||
| 		mutex_lock(&cand_cache_mutex); | ||||
| 		cc = bpf_core_find_cands(ctx, relo->type_id); | ||||
| 		if (IS_ERR(cc)) { | ||||
| 			bpf_log(ctx->log, "target candidate search failed for %d\n", | ||||
| 				relo->type_id); | ||||
| 			err = PTR_ERR(cc); | ||||
| 			goto out; | ||||
| 		} | ||||
| 		if (cc->cnt) { | ||||
| 			cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); | ||||
| 			if (!cands.cands) { | ||||
| 				err = -ENOMEM; | ||||
| 				goto out; | ||||
| 			} | ||||
| 		} | ||||
| 		for (i = 0; i < cc->cnt; i++) { | ||||
| 			bpf_log(ctx->log, | ||||
| 				"CO-RE relocating %s %s: found target candidate [%d]\n", | ||||
| 				btf_kind_str[cc->kind], cc->name, cc->cands[i].id); | ||||
| 			cands.cands[i].btf = cc->cands[i].btf; | ||||
| 			cands.cands[i].id = cc->cands[i].id; | ||||
| 		} | ||||
| 		cands.len = cc->cnt; | ||||
| 		/* cand_cache_mutex needs to span the cache lookup and
 | ||||
| 		 * copy of btf pointer into bpf_core_cand_list, | ||||
| 		 * since module can be unloaded while bpf_core_apply_relo_insn | ||||
| 		 * is working with module's btf. | ||||
| 		 */ | ||||
| 	} | ||||
| 
 | ||||
| 	err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8, | ||||
| 				       relo, relo_idx, ctx->btf, &cands); | ||||
| out: | ||||
| 	if (need_cands) { | ||||
| 		kfree(cands.cands); | ||||
| 		mutex_unlock(&cand_cache_mutex); | ||||
| 		if (ctx->log->level & BPF_LOG_LEVEL2) | ||||
| 			print_cand_cache(ctx->log); | ||||
| 	} | ||||
| 	return err; | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Alexei Starovoitov
						Alexei Starovoitov