forked from mirrors/linux
		
	net: Allow iterating sockmap and sockhash
Add bpf_iter support for sockmap / sockhash, based on the bpf_sk_storage and hashtable implementation. sockmap and sockhash share the same iteration context: a pointer to an arbitrary key and a pointer to a socket. Both pointers may be NULL, and so BPF has to perform a NULL check before accessing them. Technically it's not possible for sockhash iteration to yield a NULL socket, but we ignore this to be able to use a single iteration point. Iteration will visit all keys that remain unmodified during the lifetime of the iterator. It may or may not visit newly added ones. Switch from using rcu_dereference_raw to plain rcu_dereference, so we gain another guard rail if CONFIG_PROVE_RCU is enabled. Signed-off-by: Lorenz Bauer <lmb@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200909162712.221874-3-lmb@cloudflare.com
This commit is contained in:
		
							parent
							
								
									654785a1af
								
							
						
					
					
						commit
						0365351524
					
				
					 1 changed files with 278 additions and 2 deletions
				
			
		|  | @ -2,6 +2,7 @@ | |||
| /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ | ||||
| 
 | ||||
| #include <linux/bpf.h> | ||||
| #include <linux/btf_ids.h> | ||||
| #include <linux/filter.h> | ||||
| #include <linux/errno.h> | ||||
| #include <linux/file.h> | ||||
|  | @ -703,6 +704,109 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { | |||
| 	.arg4_type      = ARG_ANYTHING, | ||||
| }; | ||||
| 
 | ||||
| struct sock_map_seq_info { | ||||
| 	struct bpf_map *map; | ||||
| 	struct sock *sk; | ||||
| 	u32 index; | ||||
| }; | ||||
| 
 | ||||
| struct bpf_iter__sockmap { | ||||
| 	__bpf_md_ptr(struct bpf_iter_meta *, meta); | ||||
| 	__bpf_md_ptr(struct bpf_map *, map); | ||||
| 	__bpf_md_ptr(void *, key); | ||||
| 	__bpf_md_ptr(struct sock *, sk); | ||||
| }; | ||||
| 
 | ||||
| DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta, | ||||
| 		     struct bpf_map *map, void *key, | ||||
| 		     struct sock *sk) | ||||
| 
 | ||||
| static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info) | ||||
| { | ||||
| 	if (unlikely(info->index >= info->map->max_entries)) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	info->sk = __sock_map_lookup_elem(info->map, info->index); | ||||
| 
 | ||||
| 	/* can't return sk directly, since that might be NULL */ | ||||
| 	return info; | ||||
| } | ||||
| 
 | ||||
| static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos) | ||||
| { | ||||
| 	struct sock_map_seq_info *info = seq->private; | ||||
| 
 | ||||
| 	if (*pos == 0) | ||||
| 		++*pos; | ||||
| 
 | ||||
| 	/* pairs with sock_map_seq_stop */ | ||||
| 	rcu_read_lock(); | ||||
| 	return sock_map_seq_lookup_elem(info); | ||||
| } | ||||
| 
 | ||||
| static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||||
| { | ||||
| 	struct sock_map_seq_info *info = seq->private; | ||||
| 
 | ||||
| 	++*pos; | ||||
| 	++info->index; | ||||
| 
 | ||||
| 	return sock_map_seq_lookup_elem(info); | ||||
| } | ||||
| 
 | ||||
| static int sock_map_seq_show(struct seq_file *seq, void *v) | ||||
| { | ||||
| 	struct sock_map_seq_info *info = seq->private; | ||||
| 	struct bpf_iter__sockmap ctx = {}; | ||||
| 	struct bpf_iter_meta meta; | ||||
| 	struct bpf_prog *prog; | ||||
| 
 | ||||
| 	meta.seq = seq; | ||||
| 	prog = bpf_iter_get_info(&meta, !v); | ||||
| 	if (!prog) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	ctx.meta = &meta; | ||||
| 	ctx.map = info->map; | ||||
| 	if (v) { | ||||
| 		ctx.key = &info->index; | ||||
| 		ctx.sk = info->sk; | ||||
| 	} | ||||
| 
 | ||||
| 	return bpf_iter_run_prog(prog, &ctx); | ||||
| } | ||||
| 
 | ||||
| static void sock_map_seq_stop(struct seq_file *seq, void *v) | ||||
| { | ||||
| 	if (!v) | ||||
| 		(void)sock_map_seq_show(seq, NULL); | ||||
| 
 | ||||
| 	/* pairs with sock_map_seq_start */ | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
| static const struct seq_operations sock_map_seq_ops = { | ||||
| 	.start	= sock_map_seq_start, | ||||
| 	.next	= sock_map_seq_next, | ||||
| 	.stop	= sock_map_seq_stop, | ||||
| 	.show	= sock_map_seq_show, | ||||
| }; | ||||
| 
 | ||||
| static int sock_map_init_seq_private(void *priv_data, | ||||
| 				     struct bpf_iter_aux_info *aux) | ||||
| { | ||||
| 	struct sock_map_seq_info *info = priv_data; | ||||
| 
 | ||||
| 	info->map = aux->map; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static const struct bpf_iter_seq_info sock_map_iter_seq_info = { | ||||
| 	.seq_ops		= &sock_map_seq_ops, | ||||
| 	.init_seq_private	= sock_map_init_seq_private, | ||||
| 	.seq_priv_size		= sizeof(struct sock_map_seq_info), | ||||
| }; | ||||
| 
 | ||||
| static int sock_map_btf_id; | ||||
| const struct bpf_map_ops sock_map_ops = { | ||||
| 	.map_meta_equal		= bpf_map_meta_equal, | ||||
|  | @ -717,6 +821,7 @@ const struct bpf_map_ops sock_map_ops = { | |||
| 	.map_check_btf		= map_check_no_btf, | ||||
| 	.map_btf_name		= "bpf_stab", | ||||
| 	.map_btf_id		= &sock_map_btf_id, | ||||
| 	.iter_seq_info		= &sock_map_iter_seq_info, | ||||
| }; | ||||
| 
 | ||||
| struct bpf_shtab_elem { | ||||
|  | @ -953,7 +1058,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, | |||
| 	if (!elem) | ||||
| 		goto find_first_elem; | ||||
| 
 | ||||
| 	elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), | ||||
| 	elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)), | ||||
| 				     struct bpf_shtab_elem, node); | ||||
| 	if (elem_next) { | ||||
| 		memcpy(key_next, elem_next->key, key_size); | ||||
|  | @ -965,7 +1070,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, | |||
| find_first_elem: | ||||
| 	for (; i < htab->buckets_num; i++) { | ||||
| 		head = &sock_hash_select_bucket(htab, i)->head; | ||||
| 		elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), | ||||
| 		elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)), | ||||
| 					     struct bpf_shtab_elem, node); | ||||
| 		if (elem_next) { | ||||
| 			memcpy(key_next, elem_next->key, key_size); | ||||
|  | @ -1199,6 +1304,117 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { | |||
| 	.arg4_type      = ARG_ANYTHING, | ||||
| }; | ||||
| 
 | ||||
| struct sock_hash_seq_info { | ||||
| 	struct bpf_map *map; | ||||
| 	struct bpf_shtab *htab; | ||||
| 	u32 bucket_id; | ||||
| }; | ||||
| 
 | ||||
| static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info, | ||||
| 				     struct bpf_shtab_elem *prev_elem) | ||||
| { | ||||
| 	const struct bpf_shtab *htab = info->htab; | ||||
| 	struct bpf_shtab_bucket *bucket; | ||||
| 	struct bpf_shtab_elem *elem; | ||||
| 	struct hlist_node *node; | ||||
| 
 | ||||
| 	/* try to find next elem in the same bucket */ | ||||
| 	if (prev_elem) { | ||||
| 		node = rcu_dereference(hlist_next_rcu(&prev_elem->node)); | ||||
| 		elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); | ||||
| 		if (elem) | ||||
| 			return elem; | ||||
| 
 | ||||
| 		/* no more elements, continue in the next bucket */ | ||||
| 		info->bucket_id++; | ||||
| 	} | ||||
| 
 | ||||
| 	for (; info->bucket_id < htab->buckets_num; info->bucket_id++) { | ||||
| 		bucket = &htab->buckets[info->bucket_id]; | ||||
| 		node = rcu_dereference(hlist_first_rcu(&bucket->head)); | ||||
| 		elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); | ||||
| 		if (elem) | ||||
| 			return elem; | ||||
| 	} | ||||
| 
 | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos) | ||||
| { | ||||
| 	struct sock_hash_seq_info *info = seq->private; | ||||
| 
 | ||||
| 	if (*pos == 0) | ||||
| 		++*pos; | ||||
| 
 | ||||
| 	/* pairs with sock_hash_seq_stop */ | ||||
| 	rcu_read_lock(); | ||||
| 	return sock_hash_seq_find_next(info, NULL); | ||||
| } | ||||
| 
 | ||||
| static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||||
| { | ||||
| 	struct sock_hash_seq_info *info = seq->private; | ||||
| 
 | ||||
| 	++*pos; | ||||
| 	return sock_hash_seq_find_next(info, v); | ||||
| } | ||||
| 
 | ||||
| static int sock_hash_seq_show(struct seq_file *seq, void *v) | ||||
| { | ||||
| 	struct sock_hash_seq_info *info = seq->private; | ||||
| 	struct bpf_iter__sockmap ctx = {}; | ||||
| 	struct bpf_shtab_elem *elem = v; | ||||
| 	struct bpf_iter_meta meta; | ||||
| 	struct bpf_prog *prog; | ||||
| 
 | ||||
| 	meta.seq = seq; | ||||
| 	prog = bpf_iter_get_info(&meta, !elem); | ||||
| 	if (!prog) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	ctx.meta = &meta; | ||||
| 	ctx.map = info->map; | ||||
| 	if (elem) { | ||||
| 		ctx.key = elem->key; | ||||
| 		ctx.sk = elem->sk; | ||||
| 	} | ||||
| 
 | ||||
| 	return bpf_iter_run_prog(prog, &ctx); | ||||
| } | ||||
| 
 | ||||
| static void sock_hash_seq_stop(struct seq_file *seq, void *v) | ||||
| { | ||||
| 	if (!v) | ||||
| 		(void)sock_hash_seq_show(seq, NULL); | ||||
| 
 | ||||
| 	/* pairs with sock_hash_seq_start */ | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
| static const struct seq_operations sock_hash_seq_ops = { | ||||
| 	.start	= sock_hash_seq_start, | ||||
| 	.next	= sock_hash_seq_next, | ||||
| 	.stop	= sock_hash_seq_stop, | ||||
| 	.show	= sock_hash_seq_show, | ||||
| }; | ||||
| 
 | ||||
| static int sock_hash_init_seq_private(void *priv_data, | ||||
| 				     struct bpf_iter_aux_info *aux) | ||||
| { | ||||
| 	struct sock_hash_seq_info *info = priv_data; | ||||
| 
 | ||||
| 	info->map = aux->map; | ||||
| 	info->htab = container_of(aux->map, struct bpf_shtab, map); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { | ||||
| 	.seq_ops		= &sock_hash_seq_ops, | ||||
| 	.init_seq_private	= sock_hash_init_seq_private, | ||||
| 	.seq_priv_size		= sizeof(struct sock_hash_seq_info), | ||||
| }; | ||||
| 
 | ||||
| static int sock_hash_map_btf_id; | ||||
| const struct bpf_map_ops sock_hash_ops = { | ||||
| 	.map_meta_equal		= bpf_map_meta_equal, | ||||
|  | @ -1213,6 +1429,7 @@ const struct bpf_map_ops sock_hash_ops = { | |||
| 	.map_check_btf		= map_check_no_btf, | ||||
| 	.map_btf_name		= "bpf_shtab", | ||||
| 	.map_btf_id		= &sock_hash_map_btf_id, | ||||
| 	.iter_seq_info		= &sock_hash_iter_seq_info, | ||||
| }; | ||||
| 
 | ||||
| static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) | ||||
|  | @ -1323,3 +1540,62 @@ void sock_map_close(struct sock *sk, long timeout) | |||
| 	release_sock(sk); | ||||
| 	saved_close(sk, timeout); | ||||
| } | ||||
| 
 | ||||
| static int sock_map_iter_attach_target(struct bpf_prog *prog, | ||||
| 				       union bpf_iter_link_info *linfo, | ||||
| 				       struct bpf_iter_aux_info *aux) | ||||
| { | ||||
| 	struct bpf_map *map; | ||||
| 	int err = -EINVAL; | ||||
| 
 | ||||
| 	if (!linfo->map.map_fd) | ||||
| 		return -EBADF; | ||||
| 
 | ||||
| 	map = bpf_map_get_with_uref(linfo->map.map_fd); | ||||
| 	if (IS_ERR(map)) | ||||
| 		return PTR_ERR(map); | ||||
| 
 | ||||
| 	if (map->map_type != BPF_MAP_TYPE_SOCKMAP && | ||||
| 	    map->map_type != BPF_MAP_TYPE_SOCKHASH) | ||||
| 		goto put_map; | ||||
| 
 | ||||
| 	if (prog->aux->max_rdonly_access > map->key_size) { | ||||
| 		err = -EACCES; | ||||
| 		goto put_map; | ||||
| 	} | ||||
| 
 | ||||
| 	aux->map = map; | ||||
| 	return 0; | ||||
| 
 | ||||
| put_map: | ||||
| 	bpf_map_put_with_uref(map); | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux) | ||||
| { | ||||
| 	bpf_map_put_with_uref(aux->map); | ||||
| } | ||||
| 
 | ||||
| static struct bpf_iter_reg sock_map_iter_reg = { | ||||
| 	.target			= "sockmap", | ||||
| 	.attach_target		= sock_map_iter_attach_target, | ||||
| 	.detach_target		= sock_map_iter_detach_target, | ||||
| 	.show_fdinfo		= bpf_iter_map_show_fdinfo, | ||||
| 	.fill_link_info		= bpf_iter_map_fill_link_info, | ||||
| 	.ctx_arg_info_size	= 2, | ||||
| 	.ctx_arg_info		= { | ||||
| 		{ offsetof(struct bpf_iter__sockmap, key), | ||||
| 		  PTR_TO_RDONLY_BUF_OR_NULL }, | ||||
| 		{ offsetof(struct bpf_iter__sockmap, sk), | ||||
| 		  PTR_TO_BTF_ID_OR_NULL }, | ||||
| 	}, | ||||
| }; | ||||
| 
 | ||||
| static int __init bpf_sockmap_iter_init(void) | ||||
| { | ||||
| 	sock_map_iter_reg.ctx_arg_info[1].btf_id = | ||||
| 		btf_sock_ids[BTF_SOCK_TYPE_SOCK]; | ||||
| 	return bpf_iter_reg_target(&sock_map_iter_reg); | ||||
| } | ||||
| late_initcall(bpf_sockmap_iter_init); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Lorenz Bauer
						Lorenz Bauer