forked from mirrors/linux
		
	net: Allow iterating sockmap and sockhash
Add bpf_iter support for sockmap / sockhash, based on the bpf_sk_storage and hashtable implementation. sockmap and sockhash share the same iteration context: a pointer to an arbitrary key and a pointer to a socket. Both pointers may be NULL, and so BPF has to perform a NULL check before accessing them. Technically it's not possible for sockhash iteration to yield a NULL socket, but we ignore this to be able to use a single iteration point. Iteration will visit all keys that remain unmodified during the lifetime of the iterator. It may or may not visit newly added ones. Switch from using rcu_dereference_raw to plain rcu_dereference, so we gain another guard rail if CONFIG_PROVE_RCU is enabled. Signed-off-by: Lorenz Bauer <lmb@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200909162712.221874-3-lmb@cloudflare.com
This commit is contained in:
		
							parent
							
								
									654785a1af
								
							
						
					
					
						commit
						0365351524
					
				
					 1 changed files with 278 additions and 2 deletions
				
			
		|  | @ -2,6 +2,7 @@ | ||||||
| /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ | /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ | ||||||
| 
 | 
 | ||||||
| #include <linux/bpf.h> | #include <linux/bpf.h> | ||||||
|  | #include <linux/btf_ids.h> | ||||||
| #include <linux/filter.h> | #include <linux/filter.h> | ||||||
| #include <linux/errno.h> | #include <linux/errno.h> | ||||||
| #include <linux/file.h> | #include <linux/file.h> | ||||||
|  | @ -703,6 +704,109 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { | ||||||
| 	.arg4_type      = ARG_ANYTHING, | 	.arg4_type      = ARG_ANYTHING, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct sock_map_seq_info { | ||||||
|  | 	struct bpf_map *map; | ||||||
|  | 	struct sock *sk; | ||||||
|  | 	u32 index; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct bpf_iter__sockmap { | ||||||
|  | 	__bpf_md_ptr(struct bpf_iter_meta *, meta); | ||||||
|  | 	__bpf_md_ptr(struct bpf_map *, map); | ||||||
|  | 	__bpf_md_ptr(void *, key); | ||||||
|  | 	__bpf_md_ptr(struct sock *, sk); | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta, | ||||||
|  | 		     struct bpf_map *map, void *key, | ||||||
|  | 		     struct sock *sk) | ||||||
|  | 
 | ||||||
|  | static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info) | ||||||
|  | { | ||||||
|  | 	if (unlikely(info->index >= info->map->max_entries)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	info->sk = __sock_map_lookup_elem(info->map, info->index); | ||||||
|  | 
 | ||||||
|  | 	/* can't return sk directly, since that might be NULL */ | ||||||
|  | 	return info; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos) | ||||||
|  | { | ||||||
|  | 	struct sock_map_seq_info *info = seq->private; | ||||||
|  | 
 | ||||||
|  | 	if (*pos == 0) | ||||||
|  | 		++*pos; | ||||||
|  | 
 | ||||||
|  | 	/* pairs with sock_map_seq_stop */ | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	return sock_map_seq_lookup_elem(info); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||||||
|  | { | ||||||
|  | 	struct sock_map_seq_info *info = seq->private; | ||||||
|  | 
 | ||||||
|  | 	++*pos; | ||||||
|  | 	++info->index; | ||||||
|  | 
 | ||||||
|  | 	return sock_map_seq_lookup_elem(info); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int sock_map_seq_show(struct seq_file *seq, void *v) | ||||||
|  | { | ||||||
|  | 	struct sock_map_seq_info *info = seq->private; | ||||||
|  | 	struct bpf_iter__sockmap ctx = {}; | ||||||
|  | 	struct bpf_iter_meta meta; | ||||||
|  | 	struct bpf_prog *prog; | ||||||
|  | 
 | ||||||
|  | 	meta.seq = seq; | ||||||
|  | 	prog = bpf_iter_get_info(&meta, !v); | ||||||
|  | 	if (!prog) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	ctx.meta = &meta; | ||||||
|  | 	ctx.map = info->map; | ||||||
|  | 	if (v) { | ||||||
|  | 		ctx.key = &info->index; | ||||||
|  | 		ctx.sk = info->sk; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return bpf_iter_run_prog(prog, &ctx); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void sock_map_seq_stop(struct seq_file *seq, void *v) | ||||||
|  | { | ||||||
|  | 	if (!v) | ||||||
|  | 		(void)sock_map_seq_show(seq, NULL); | ||||||
|  | 
 | ||||||
|  | 	/* pairs with sock_map_seq_start */ | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const struct seq_operations sock_map_seq_ops = { | ||||||
|  | 	.start	= sock_map_seq_start, | ||||||
|  | 	.next	= sock_map_seq_next, | ||||||
|  | 	.stop	= sock_map_seq_stop, | ||||||
|  | 	.show	= sock_map_seq_show, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int sock_map_init_seq_private(void *priv_data, | ||||||
|  | 				     struct bpf_iter_aux_info *aux) | ||||||
|  | { | ||||||
|  | 	struct sock_map_seq_info *info = priv_data; | ||||||
|  | 
 | ||||||
|  | 	info->map = aux->map; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const struct bpf_iter_seq_info sock_map_iter_seq_info = { | ||||||
|  | 	.seq_ops		= &sock_map_seq_ops, | ||||||
|  | 	.init_seq_private	= sock_map_init_seq_private, | ||||||
|  | 	.seq_priv_size		= sizeof(struct sock_map_seq_info), | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static int sock_map_btf_id; | static int sock_map_btf_id; | ||||||
| const struct bpf_map_ops sock_map_ops = { | const struct bpf_map_ops sock_map_ops = { | ||||||
| 	.map_meta_equal		= bpf_map_meta_equal, | 	.map_meta_equal		= bpf_map_meta_equal, | ||||||
|  | @ -717,6 +821,7 @@ const struct bpf_map_ops sock_map_ops = { | ||||||
| 	.map_check_btf		= map_check_no_btf, | 	.map_check_btf		= map_check_no_btf, | ||||||
| 	.map_btf_name		= "bpf_stab", | 	.map_btf_name		= "bpf_stab", | ||||||
| 	.map_btf_id		= &sock_map_btf_id, | 	.map_btf_id		= &sock_map_btf_id, | ||||||
|  | 	.iter_seq_info		= &sock_map_iter_seq_info, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct bpf_shtab_elem { | struct bpf_shtab_elem { | ||||||
|  | @ -953,7 +1058,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, | ||||||
| 	if (!elem) | 	if (!elem) | ||||||
| 		goto find_first_elem; | 		goto find_first_elem; | ||||||
| 
 | 
 | ||||||
| 	elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), | 	elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)), | ||||||
| 				     struct bpf_shtab_elem, node); | 				     struct bpf_shtab_elem, node); | ||||||
| 	if (elem_next) { | 	if (elem_next) { | ||||||
| 		memcpy(key_next, elem_next->key, key_size); | 		memcpy(key_next, elem_next->key, key_size); | ||||||
|  | @ -965,7 +1070,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, | ||||||
| find_first_elem: | find_first_elem: | ||||||
| 	for (; i < htab->buckets_num; i++) { | 	for (; i < htab->buckets_num; i++) { | ||||||
| 		head = &sock_hash_select_bucket(htab, i)->head; | 		head = &sock_hash_select_bucket(htab, i)->head; | ||||||
| 		elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), | 		elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)), | ||||||
| 					     struct bpf_shtab_elem, node); | 					     struct bpf_shtab_elem, node); | ||||||
| 		if (elem_next) { | 		if (elem_next) { | ||||||
| 			memcpy(key_next, elem_next->key, key_size); | 			memcpy(key_next, elem_next->key, key_size); | ||||||
|  | @ -1199,6 +1304,117 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { | ||||||
| 	.arg4_type      = ARG_ANYTHING, | 	.arg4_type      = ARG_ANYTHING, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct sock_hash_seq_info { | ||||||
|  | 	struct bpf_map *map; | ||||||
|  | 	struct bpf_shtab *htab; | ||||||
|  | 	u32 bucket_id; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info, | ||||||
|  | 				     struct bpf_shtab_elem *prev_elem) | ||||||
|  | { | ||||||
|  | 	const struct bpf_shtab *htab = info->htab; | ||||||
|  | 	struct bpf_shtab_bucket *bucket; | ||||||
|  | 	struct bpf_shtab_elem *elem; | ||||||
|  | 	struct hlist_node *node; | ||||||
|  | 
 | ||||||
|  | 	/* try to find next elem in the same bucket */ | ||||||
|  | 	if (prev_elem) { | ||||||
|  | 		node = rcu_dereference(hlist_next_rcu(&prev_elem->node)); | ||||||
|  | 		elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); | ||||||
|  | 		if (elem) | ||||||
|  | 			return elem; | ||||||
|  | 
 | ||||||
|  | 		/* no more elements, continue in the next bucket */ | ||||||
|  | 		info->bucket_id++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for (; info->bucket_id < htab->buckets_num; info->bucket_id++) { | ||||||
|  | 		bucket = &htab->buckets[info->bucket_id]; | ||||||
|  | 		node = rcu_dereference(hlist_first_rcu(&bucket->head)); | ||||||
|  | 		elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); | ||||||
|  | 		if (elem) | ||||||
|  | 			return elem; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos) | ||||||
|  | { | ||||||
|  | 	struct sock_hash_seq_info *info = seq->private; | ||||||
|  | 
 | ||||||
|  | 	if (*pos == 0) | ||||||
|  | 		++*pos; | ||||||
|  | 
 | ||||||
|  | 	/* pairs with sock_hash_seq_stop */ | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	return sock_hash_seq_find_next(info, NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||||||
|  | { | ||||||
|  | 	struct sock_hash_seq_info *info = seq->private; | ||||||
|  | 
 | ||||||
|  | 	++*pos; | ||||||
|  | 	return sock_hash_seq_find_next(info, v); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int sock_hash_seq_show(struct seq_file *seq, void *v) | ||||||
|  | { | ||||||
|  | 	struct sock_hash_seq_info *info = seq->private; | ||||||
|  | 	struct bpf_iter__sockmap ctx = {}; | ||||||
|  | 	struct bpf_shtab_elem *elem = v; | ||||||
|  | 	struct bpf_iter_meta meta; | ||||||
|  | 	struct bpf_prog *prog; | ||||||
|  | 
 | ||||||
|  | 	meta.seq = seq; | ||||||
|  | 	prog = bpf_iter_get_info(&meta, !elem); | ||||||
|  | 	if (!prog) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	ctx.meta = &meta; | ||||||
|  | 	ctx.map = info->map; | ||||||
|  | 	if (elem) { | ||||||
|  | 		ctx.key = elem->key; | ||||||
|  | 		ctx.sk = elem->sk; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return bpf_iter_run_prog(prog, &ctx); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void sock_hash_seq_stop(struct seq_file *seq, void *v) | ||||||
|  | { | ||||||
|  | 	if (!v) | ||||||
|  | 		(void)sock_hash_seq_show(seq, NULL); | ||||||
|  | 
 | ||||||
|  | 	/* pairs with sock_hash_seq_start */ | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const struct seq_operations sock_hash_seq_ops = { | ||||||
|  | 	.start	= sock_hash_seq_start, | ||||||
|  | 	.next	= sock_hash_seq_next, | ||||||
|  | 	.stop	= sock_hash_seq_stop, | ||||||
|  | 	.show	= sock_hash_seq_show, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int sock_hash_init_seq_private(void *priv_data, | ||||||
|  | 				     struct bpf_iter_aux_info *aux) | ||||||
|  | { | ||||||
|  | 	struct sock_hash_seq_info *info = priv_data; | ||||||
|  | 
 | ||||||
|  | 	info->map = aux->map; | ||||||
|  | 	info->htab = container_of(aux->map, struct bpf_shtab, map); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { | ||||||
|  | 	.seq_ops		= &sock_hash_seq_ops, | ||||||
|  | 	.init_seq_private	= sock_hash_init_seq_private, | ||||||
|  | 	.seq_priv_size		= sizeof(struct sock_hash_seq_info), | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static int sock_hash_map_btf_id; | static int sock_hash_map_btf_id; | ||||||
| const struct bpf_map_ops sock_hash_ops = { | const struct bpf_map_ops sock_hash_ops = { | ||||||
| 	.map_meta_equal		= bpf_map_meta_equal, | 	.map_meta_equal		= bpf_map_meta_equal, | ||||||
|  | @ -1213,6 +1429,7 @@ const struct bpf_map_ops sock_hash_ops = { | ||||||
| 	.map_check_btf		= map_check_no_btf, | 	.map_check_btf		= map_check_no_btf, | ||||||
| 	.map_btf_name		= "bpf_shtab", | 	.map_btf_name		= "bpf_shtab", | ||||||
| 	.map_btf_id		= &sock_hash_map_btf_id, | 	.map_btf_id		= &sock_hash_map_btf_id, | ||||||
|  | 	.iter_seq_info		= &sock_hash_iter_seq_info, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) | static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) | ||||||
|  | @ -1323,3 +1540,62 @@ void sock_map_close(struct sock *sk, long timeout) | ||||||
| 	release_sock(sk); | 	release_sock(sk); | ||||||
| 	saved_close(sk, timeout); | 	saved_close(sk, timeout); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | static int sock_map_iter_attach_target(struct bpf_prog *prog, | ||||||
|  | 				       union bpf_iter_link_info *linfo, | ||||||
|  | 				       struct bpf_iter_aux_info *aux) | ||||||
|  | { | ||||||
|  | 	struct bpf_map *map; | ||||||
|  | 	int err = -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	if (!linfo->map.map_fd) | ||||||
|  | 		return -EBADF; | ||||||
|  | 
 | ||||||
|  | 	map = bpf_map_get_with_uref(linfo->map.map_fd); | ||||||
|  | 	if (IS_ERR(map)) | ||||||
|  | 		return PTR_ERR(map); | ||||||
|  | 
 | ||||||
|  | 	if (map->map_type != BPF_MAP_TYPE_SOCKMAP && | ||||||
|  | 	    map->map_type != BPF_MAP_TYPE_SOCKHASH) | ||||||
|  | 		goto put_map; | ||||||
|  | 
 | ||||||
|  | 	if (prog->aux->max_rdonly_access > map->key_size) { | ||||||
|  | 		err = -EACCES; | ||||||
|  | 		goto put_map; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	aux->map = map; | ||||||
|  | 	return 0; | ||||||
|  | 
 | ||||||
|  | put_map: | ||||||
|  | 	bpf_map_put_with_uref(map); | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux) | ||||||
|  | { | ||||||
|  | 	bpf_map_put_with_uref(aux->map); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct bpf_iter_reg sock_map_iter_reg = { | ||||||
|  | 	.target			= "sockmap", | ||||||
|  | 	.attach_target		= sock_map_iter_attach_target, | ||||||
|  | 	.detach_target		= sock_map_iter_detach_target, | ||||||
|  | 	.show_fdinfo		= bpf_iter_map_show_fdinfo, | ||||||
|  | 	.fill_link_info		= bpf_iter_map_fill_link_info, | ||||||
|  | 	.ctx_arg_info_size	= 2, | ||||||
|  | 	.ctx_arg_info		= { | ||||||
|  | 		{ offsetof(struct bpf_iter__sockmap, key), | ||||||
|  | 		  PTR_TO_RDONLY_BUF_OR_NULL }, | ||||||
|  | 		{ offsetof(struct bpf_iter__sockmap, sk), | ||||||
|  | 		  PTR_TO_BTF_ID_OR_NULL }, | ||||||
|  | 	}, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int __init bpf_sockmap_iter_init(void) | ||||||
|  | { | ||||||
|  | 	sock_map_iter_reg.ctx_arg_info[1].btf_id = | ||||||
|  | 		btf_sock_ids[BTF_SOCK_TYPE_SOCK]; | ||||||
|  | 	return bpf_iter_reg_target(&sock_map_iter_reg); | ||||||
|  | } | ||||||
|  | late_initcall(bpf_sockmap_iter_init); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Lorenz Bauer
						Lorenz Bauer