mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT
This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY. Like other non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN. BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern" to store the bpf context instead of using the skb->cb[48]. At the SO_REUSEPORT sk lookup time, it is in the middle of transiting from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp). At this point, it is not always clear where the bpf context can be appended in the skb->cb[48] to avoid saving-and-restoring cb[]. Even putting aside the difference between ipv4-vs-ipv6 and udp-vs-tcp. It is not clear if the lower layer is only ipv4 and ipv6 in the future and will it not touch the cb[] again before transiting to the upper layer. For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB instead of IP[6]CB and it may still modify the cb[] after calling the udp[46]_lib_lookup_skb(). Because of the above reason, if sk->cb is used for the bpf ctx, saving-and-restoring is needed and likely the whole 48 bytes cb[] has to be saved and restored. Instead of saving, setting and restoring the cb[], this patch opts to create a new "struct sk_reuseport_kern" and setting the needed values in there. The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)" will serve all ipv4/ipv6 + udp/tcp combinations. There is no protocol specific usage at this point and it is also inline with the current sock_reuseport.c implementation (i.e. no protocol specific requirement). In "struct sk_reuseport_md", this patch exposes data/data_end/len with semantic similar to other existing usages. Together with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()", the bpf prog can peek anywhere in the skb. The "bind_inany" tells the bpf prog that the reuseport group is bind-ed to a local INANY address which cannot be learned from skb. The new "bind_inany" is added to "struct sock_reuseport" which will be used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order to avoid repeating the "bind INANY" test on "sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run. It can only be properly initialized when a "sk->sk_reuseport" enabled sk is adding to a hashtable (i.e. during "reuseport_alloc()" and "reuseport_add_sock()"). The new "sk_select_reuseport()" is the main helper that the bpf prog will use to select a SO_REUSEPORT sk. It is the only function that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY. As mentioned in the earlier patch, the validity of a selected sk is checked in run time in "sk_select_reuseport()". Doing the check in verification time is difficult and inflexible (consider the map-in-map use case). The runtime check is to compare the selected sk's reuseport_id with the reuseport_id that we want. This helper will return -EXXX if the selected sk cannot serve the incoming request (e.g. reuseport_id not match). The bpf prog can decide if it wants to do SK_DROP as its discretion. When the bpf prog returns SK_PASS, the kernel will check if a valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL"). If it does , it will use the selected sk. If not, the kernel will select one from "reuse->socks[]" (as before this patch). The SK_DROP and SK_PASS handling logic will be in the next patch. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
		
							parent
							
								
									5dc4c4b7d4
								
							
						
					
					
						commit
						2dbb9b9e6d
					
				
					 11 changed files with 365 additions and 13 deletions
				
			
		| 
						 | 
				
			
			@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
 | 
			
		|||
#ifdef CONFIG_BPF_LIRC_MODE2
 | 
			
		||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef CONFIG_INET
 | 
			
		||||
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 | 
			
		||||
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,6 +32,7 @@ struct seccomp_data;
 | 
			
		|||
struct bpf_prog_aux;
 | 
			
		||||
struct xdp_rxq_info;
 | 
			
		||||
struct xdp_buff;
 | 
			
		||||
struct sock_reuseport;
 | 
			
		||||
 | 
			
		||||
/* ArgX, context and stack frame pointer register positions. Note,
 | 
			
		||||
 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
 | 
			
		||||
| 
						 | 
				
			
			@ -833,6 +834,20 @@ void bpf_warn_invalid_xdp_action(u32 act);
 | 
			
		|||
struct sock *do_sk_redirect_map(struct sk_buff *skb);
 | 
			
		||||
struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_INET
 | 
			
		||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 | 
			
		||||
				  struct bpf_prog *prog, struct sk_buff *skb,
 | 
			
		||||
				  u32 hash);
 | 
			
		||||
#else
 | 
			
		||||
static inline struct sock *
 | 
			
		||||
bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 | 
			
		||||
		     struct bpf_prog *prog, struct sk_buff *skb,
 | 
			
		||||
		     u32 hash)
 | 
			
		||||
{
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_BPF_JIT
 | 
			
		||||
extern int bpf_jit_enable;
 | 
			
		||||
extern int bpf_jit_harden;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 | 
			
		|||
		    u32 banned_flags);
 | 
			
		||||
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 | 
			
		||||
			  bool match_wildcard);
 | 
			
		||||
bool inet_rcv_saddr_any(const struct sock *sk);
 | 
			
		||||
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 | 
			
		||||
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,12 +21,14 @@ struct sock_reuseport {
 | 
			
		|||
	unsigned int		synq_overflow_ts;
 | 
			
		||||
	/* ID stays the same even after the size of socks[] grows. */
 | 
			
		||||
	unsigned int		reuseport_id;
 | 
			
		||||
	bool			bind_inany;
 | 
			
		||||
	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 | 
			
		||||
	struct sock		*socks[0];	/* array of sock pointers */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern int reuseport_alloc(struct sock *sk);
 | 
			
		||||
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 | 
			
		||||
extern int reuseport_alloc(struct sock *sk, bool bind_inany);
 | 
			
		||||
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
 | 
			
		||||
			      bool bind_inany);
 | 
			
		||||
extern void reuseport_detach_sock(struct sock *sk);
 | 
			
		||||
extern struct sock *reuseport_select_sock(struct sock *sk,
 | 
			
		||||
					  u32 hash,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -151,6 +151,7 @@ enum bpf_prog_type {
 | 
			
		|||
	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 | 
			
		||||
	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 | 
			
		||||
	BPF_PROG_TYPE_LIRC_MODE2,
 | 
			
		||||
	BPF_PROG_TYPE_SK_REUSEPORT,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum bpf_attach_type {
 | 
			
		||||
| 
						 | 
				
			
			@ -2114,6 +2115,14 @@ union bpf_attr {
 | 
			
		|||
 *		the shared data.
 | 
			
		||||
 *	Return
 | 
			
		||||
 *		Pointer to the local storage area.
 | 
			
		||||
 *
 | 
			
		||||
 * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
 | 
			
		||||
 *	Description
 | 
			
		||||
 *		Select a SO_REUSEPORT sk from a	BPF_MAP_TYPE_REUSEPORT_ARRAY map
 | 
			
		||||
 *		It checks the selected sk is matching the incoming
 | 
			
		||||
 *		request in the skb.
 | 
			
		||||
 *	Return
 | 
			
		||||
 *		0 on success, or a negative error in case of failure.
 | 
			
		||||
 */
 | 
			
		||||
#define __BPF_FUNC_MAPPER(FN)		\
 | 
			
		||||
	FN(unspec),			\
 | 
			
		||||
| 
						 | 
				
			
			@ -2197,7 +2206,8 @@ union bpf_attr {
 | 
			
		|||
	FN(rc_keydown),			\
 | 
			
		||||
	FN(skb_cgroup_id),		\
 | 
			
		||||
	FN(get_current_cgroup_id),	\
 | 
			
		||||
	FN(get_local_storage),
 | 
			
		||||
	FN(get_local_storage),		\
 | 
			
		||||
	FN(sk_select_reuseport),
 | 
			
		||||
 | 
			
		||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 | 
			
		||||
 * function eBPF program intends to call
 | 
			
		||||
| 
						 | 
				
			
			@ -2414,6 +2424,30 @@ struct sk_msg_md {
 | 
			
		|||
	__u32 local_port;	/* stored in host byte order */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct sk_reuseport_md {
 | 
			
		||||
	/*
 | 
			
		||||
	 * Start of directly accessible data. It begins from
 | 
			
		||||
	 * the tcp/udp header.
 | 
			
		||||
	 */
 | 
			
		||||
	void *data;
 | 
			
		||||
	void *data_end;		/* End of directly accessible data */
 | 
			
		||||
	/*
 | 
			
		||||
	 * Total length of packet (starting from the tcp/udp header).
 | 
			
		||||
	 * Note that the directly accessible bytes (data_end - data)
 | 
			
		||||
	 * could be less than this "len".  Those bytes could be
 | 
			
		||||
	 * indirectly read by a helper "bpf_skb_load_bytes()".
 | 
			
		||||
	 */
 | 
			
		||||
	__u32 len;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Eth protocol in the mac header (network byte order). e.g.
 | 
			
		||||
	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
 | 
			
		||||
	 */
 | 
			
		||||
	__u32 eth_protocol;
 | 
			
		||||
	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
 | 
			
		||||
	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 | 
			
		||||
	__u32 hash;		/* A hash of the packet 4 tuples */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define BPF_TAG_SIZE	8
 | 
			
		||||
 | 
			
		||||
struct bpf_prog_info {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1310,6 +1310,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 | 
			
		|||
	case BPF_PROG_TYPE_LWT_IN:
 | 
			
		||||
	case BPF_PROG_TYPE_LWT_OUT:
 | 
			
		||||
	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 | 
			
		||||
	case BPF_PROG_TYPE_SK_REUSEPORT:
 | 
			
		||||
		/* dst_input() and dst_output() can't write for now */
 | 
			
		||||
		if (t == BPF_WRITE)
 | 
			
		||||
			return false;
 | 
			
		||||
| 
						 | 
				
			
			@ -2166,6 +2167,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 | 
			
		|||
		    func_id != BPF_FUNC_msg_redirect_hash)
 | 
			
		||||
			goto error;
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
 | 
			
		||||
		if (func_id != BPF_FUNC_sk_select_reuseport)
 | 
			
		||||
			goto error;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -2217,6 +2222,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 | 
			
		|||
		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
 | 
			
		||||
			goto error;
 | 
			
		||||
		break;
 | 
			
		||||
	case BPF_FUNC_sk_select_reuseport:
 | 
			
		||||
		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
 | 
			
		||||
			goto error;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1462,7 +1462,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
 | 
			
		|||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	if (sk_unhashed(sk) && sk->sk_reuseport) {
 | 
			
		||||
		err = reuseport_alloc(sk);
 | 
			
		||||
		err = reuseport_alloc(sk, false);
 | 
			
		||||
		if (err)
 | 
			
		||||
			return err;
 | 
			
		||||
	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -7013,3 +7013,270 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 | 
			
		|||
	release_sock(sk);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_INET
 | 
			
		||||
struct sk_reuseport_kern {
 | 
			
		||||
	struct sk_buff *skb;
 | 
			
		||||
	struct sock *sk;
 | 
			
		||||
	struct sock *selected_sk;
 | 
			
		||||
	void *data_end;
 | 
			
		||||
	u32 hash;
 | 
			
		||||
	u32 reuseport_id;
 | 
			
		||||
	bool bind_inany;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
 | 
			
		||||
				    struct sock_reuseport *reuse,
 | 
			
		||||
				    struct sock *sk, struct sk_buff *skb,
 | 
			
		||||
				    u32 hash)
 | 
			
		||||
{
 | 
			
		||||
	reuse_kern->skb = skb;
 | 
			
		||||
	reuse_kern->sk = sk;
 | 
			
		||||
	reuse_kern->selected_sk = NULL;
 | 
			
		||||
	reuse_kern->data_end = skb->data + skb_headlen(skb);
 | 
			
		||||
	reuse_kern->hash = hash;
 | 
			
		||||
	reuse_kern->reuseport_id = reuse->reuseport_id;
 | 
			
		||||
	reuse_kern->bind_inany = reuse->bind_inany;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 | 
			
		||||
				  struct bpf_prog *prog, struct sk_buff *skb,
 | 
			
		||||
				  u32 hash)
 | 
			
		||||
{
 | 
			
		||||
	struct sk_reuseport_kern reuse_kern;
 | 
			
		||||
	enum sk_action action;
 | 
			
		||||
 | 
			
		||||
	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
 | 
			
		||||
	action = BPF_PROG_RUN(prog, &reuse_kern);
 | 
			
		||||
 | 
			
		||||
	if (action == SK_PASS)
 | 
			
		||||
		return reuse_kern.selected_sk;
 | 
			
		||||
	else
 | 
			
		||||
		return ERR_PTR(-ECONNREFUSED);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
 | 
			
		||||
	   struct bpf_map *, map, void *, key, u32, flags)
 | 
			
		||||
{
 | 
			
		||||
	struct sock_reuseport *reuse;
 | 
			
		||||
	struct sock *selected_sk;
 | 
			
		||||
 | 
			
		||||
	selected_sk = map->ops->map_lookup_elem(map, key);
 | 
			
		||||
	if (!selected_sk)
 | 
			
		||||
		return -ENOENT;
 | 
			
		||||
 | 
			
		||||
	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
 | 
			
		||||
	if (!reuse)
 | 
			
		||||
		/* selected_sk is unhashed (e.g. by close()) after the
 | 
			
		||||
		 * above map_lookup_elem().  Treat selected_sk has already
 | 
			
		||||
		 * been removed from the map.
 | 
			
		||||
		 */
 | 
			
		||||
		return -ENOENT;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
 | 
			
		||||
		struct sock *sk;
 | 
			
		||||
 | 
			
		||||
		if (unlikely(!reuse_kern->reuseport_id))
 | 
			
		||||
			/* There is a small race between adding the
 | 
			
		||||
			 * sk to the map and setting the
 | 
			
		||||
			 * reuse_kern->reuseport_id.
 | 
			
		||||
			 * Treat it as the sk has not been added to
 | 
			
		||||
			 * the bpf map yet.
 | 
			
		||||
			 */
 | 
			
		||||
			return -ENOENT;
 | 
			
		||||
 | 
			
		||||
		sk = reuse_kern->sk;
 | 
			
		||||
		if (sk->sk_protocol != selected_sk->sk_protocol)
 | 
			
		||||
			return -EPROTOTYPE;
 | 
			
		||||
		else if (sk->sk_family != selected_sk->sk_family)
 | 
			
		||||
			return -EAFNOSUPPORT;
 | 
			
		||||
 | 
			
		||||
		/* Catch all. Likely bound to a different sockaddr. */
 | 
			
		||||
		return -EBADFD;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	reuse_kern->selected_sk = selected_sk;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct bpf_func_proto sk_select_reuseport_proto = {
 | 
			
		||||
	.func           = sk_select_reuseport,
 | 
			
		||||
	.gpl_only       = false,
 | 
			
		||||
	.ret_type       = RET_INTEGER,
 | 
			
		||||
	.arg1_type	= ARG_PTR_TO_CTX,
 | 
			
		||||
	.arg2_type      = ARG_CONST_MAP_PTR,
 | 
			
		||||
	.arg3_type      = ARG_PTR_TO_MAP_KEY,
 | 
			
		||||
	.arg4_type	= ARG_ANYTHING,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
BPF_CALL_4(sk_reuseport_load_bytes,
 | 
			
		||||
	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
 | 
			
		||||
	   void *, to, u32, len)
 | 
			
		||||
{
 | 
			
		||||
	return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
 | 
			
		||||
	.func		= sk_reuseport_load_bytes,
 | 
			
		||||
	.gpl_only	= false,
 | 
			
		||||
	.ret_type	= RET_INTEGER,
 | 
			
		||||
	.arg1_type	= ARG_PTR_TO_CTX,
 | 
			
		||||
	.arg2_type	= ARG_ANYTHING,
 | 
			
		||||
	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
 | 
			
		||||
	.arg4_type	= ARG_CONST_SIZE,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
BPF_CALL_5(sk_reuseport_load_bytes_relative,
 | 
			
		||||
	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
 | 
			
		||||
	   void *, to, u32, len, u32, start_header)
 | 
			
		||||
{
 | 
			
		||||
	return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
 | 
			
		||||
					       len, start_header);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
 | 
			
		||||
	.func		= sk_reuseport_load_bytes_relative,
 | 
			
		||||
	.gpl_only	= false,
 | 
			
		||||
	.ret_type	= RET_INTEGER,
 | 
			
		||||
	.arg1_type	= ARG_PTR_TO_CTX,
 | 
			
		||||
	.arg2_type	= ARG_ANYTHING,
 | 
			
		||||
	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
 | 
			
		||||
	.arg4_type	= ARG_CONST_SIZE,
 | 
			
		||||
	.arg5_type	= ARG_ANYTHING,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct bpf_func_proto *
 | 
			
		||||
sk_reuseport_func_proto(enum bpf_func_id func_id,
 | 
			
		||||
			const struct bpf_prog *prog)
 | 
			
		||||
{
 | 
			
		||||
	switch (func_id) {
 | 
			
		||||
	case BPF_FUNC_sk_select_reuseport:
 | 
			
		||||
		return &sk_select_reuseport_proto;
 | 
			
		||||
	case BPF_FUNC_skb_load_bytes:
 | 
			
		||||
		return &sk_reuseport_load_bytes_proto;
 | 
			
		||||
	case BPF_FUNC_skb_load_bytes_relative:
 | 
			
		||||
		return &sk_reuseport_load_bytes_relative_proto;
 | 
			
		||||
	default:
 | 
			
		||||
		return bpf_base_func_proto(func_id);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
sk_reuseport_is_valid_access(int off, int size,
 | 
			
		||||
			     enum bpf_access_type type,
 | 
			
		||||
			     const struct bpf_prog *prog,
 | 
			
		||||
			     struct bpf_insn_access_aux *info)
 | 
			
		||||
{
 | 
			
		||||
	const u32 size_default = sizeof(__u32);
 | 
			
		||||
 | 
			
		||||
	if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
 | 
			
		||||
	    off % size || type != BPF_READ)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	switch (off) {
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, data):
 | 
			
		||||
		info->reg_type = PTR_TO_PACKET;
 | 
			
		||||
		return size == sizeof(__u64);
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, data_end):
 | 
			
		||||
		info->reg_type = PTR_TO_PACKET_END;
 | 
			
		||||
		return size == sizeof(__u64);
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, hash):
 | 
			
		||||
		return size == size_default;
 | 
			
		||||
 | 
			
		||||
	/* Fields that allow narrowing */
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, eth_protocol):
 | 
			
		||||
		if (size < FIELD_SIZEOF(struct sk_buff, protocol))
 | 
			
		||||
			return false;
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, ip_protocol):
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, bind_inany):
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, len):
 | 
			
		||||
		bpf_ctx_record_field_size(info, size_default);
 | 
			
		||||
		return bpf_ctx_narrow_access_ok(off, size, size_default);
 | 
			
		||||
 | 
			
		||||
	default:
 | 
			
		||||
		return false;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define SK_REUSEPORT_LOAD_FIELD(F) ({					\
 | 
			
		||||
	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
 | 
			
		||||
			      si->dst_reg, si->src_reg,			\
 | 
			
		||||
			      bpf_target_off(struct sk_reuseport_kern, F, \
 | 
			
		||||
					     FIELD_SIZEOF(struct sk_reuseport_kern, F), \
 | 
			
		||||
					     target_size));		\
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)				\
 | 
			
		||||
	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
 | 
			
		||||
				    struct sk_buff,			\
 | 
			
		||||
				    skb,				\
 | 
			
		||||
				    SKB_FIELD)
 | 
			
		||||
 | 
			
		||||
#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
 | 
			
		||||
	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern,	\
 | 
			
		||||
					     struct sock,		\
 | 
			
		||||
					     sk,			\
 | 
			
		||||
					     SK_FIELD, BPF_SIZE, EXTRA_OFF)
 | 
			
		||||
 | 
			
		||||
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 | 
			
		||||
					   const struct bpf_insn *si,
 | 
			
		||||
					   struct bpf_insn *insn_buf,
 | 
			
		||||
					   struct bpf_prog *prog,
 | 
			
		||||
					   u32 *target_size)
 | 
			
		||||
{
 | 
			
		||||
	struct bpf_insn *insn = insn_buf;
 | 
			
		||||
 | 
			
		||||
	switch (si->off) {
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, data):
 | 
			
		||||
		SK_REUSEPORT_LOAD_SKB_FIELD(data);
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, len):
 | 
			
		||||
		SK_REUSEPORT_LOAD_SKB_FIELD(len);
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, eth_protocol):
 | 
			
		||||
		SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, ip_protocol):
 | 
			
		||||
		BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 | 
			
		||||
		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
 | 
			
		||||
						    BPF_W, 0);
 | 
			
		||||
		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
 | 
			
		||||
		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
 | 
			
		||||
					SK_FL_PROTO_SHIFT);
 | 
			
		||||
		/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
 | 
			
		||||
		 * aware.  No further narrowing or masking is needed.
 | 
			
		||||
		 */
 | 
			
		||||
		*target_size = 1;
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, data_end):
 | 
			
		||||
		SK_REUSEPORT_LOAD_FIELD(data_end);
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, hash):
 | 
			
		||||
		SK_REUSEPORT_LOAD_FIELD(hash);
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case offsetof(struct sk_reuseport_md, bind_inany):
 | 
			
		||||
		SK_REUSEPORT_LOAD_FIELD(bind_inany);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return insn - insn_buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
 | 
			
		||||
	.get_func_proto		= sk_reuseport_func_proto,
 | 
			
		||||
	.is_valid_access	= sk_reuseport_is_valid_access,
 | 
			
		||||
	.convert_ctx_access	= sk_reuseport_convert_ctx_access,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const struct bpf_prog_ops sk_reuseport_prog_ops = {
 | 
			
		||||
};
 | 
			
		||||
#endif /* CONFIG_INET */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -51,7 +51,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 | 
			
		|||
	return reuse;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int reuseport_alloc(struct sock *sk)
 | 
			
		||||
int reuseport_alloc(struct sock *sk, bool bind_inany)
 | 
			
		||||
{
 | 
			
		||||
	struct sock_reuseport *reuse;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -63,9 +63,17 @@ int reuseport_alloc(struct sock *sk)
 | 
			
		|||
	/* Allocation attempts can occur concurrently via the setsockopt path
 | 
			
		||||
	 * and the bind/hash path.  Nothing to do when we lose the race.
 | 
			
		||||
	 */
 | 
			
		||||
	if (rcu_dereference_protected(sk->sk_reuseport_cb,
 | 
			
		||||
				      lockdep_is_held(&reuseport_lock)))
 | 
			
		||||
	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 | 
			
		||||
					  lockdep_is_held(&reuseport_lock));
 | 
			
		||||
	if (reuse) {
 | 
			
		||||
		/* Only set reuse->bind_inany if the bind_inany is true.
 | 
			
		||||
		 * Otherwise, it will overwrite the reuse->bind_inany
 | 
			
		||||
		 * which was set by the bind/hash path.
 | 
			
		||||
		 */
 | 
			
		||||
		if (bind_inany)
 | 
			
		||||
			reuse->bind_inany = bind_inany;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	reuse = __reuseport_alloc(INIT_SOCKS);
 | 
			
		||||
	if (!reuse) {
 | 
			
		||||
| 
						 | 
				
			
			@ -75,6 +83,7 @@ int reuseport_alloc(struct sock *sk)
 | 
			
		|||
 | 
			
		||||
	reuse->socks[0] = sk;
 | 
			
		||||
	reuse->num_socks = 1;
 | 
			
		||||
	reuse->bind_inany = bind_inany;
 | 
			
		||||
	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
| 
						 | 
				
			
			@ -101,6 +110,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 | 
			
		|||
	more_reuse->num_socks = reuse->num_socks;
 | 
			
		||||
	more_reuse->prog = reuse->prog;
 | 
			
		||||
	more_reuse->reuseport_id = reuse->reuseport_id;
 | 
			
		||||
	more_reuse->bind_inany = reuse->bind_inany;
 | 
			
		||||
 | 
			
		||||
	memcpy(more_reuse->socks, reuse->socks,
 | 
			
		||||
	       reuse->num_socks * sizeof(struct sock *));
 | 
			
		||||
| 
						 | 
				
			
			@ -136,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head)
 | 
			
		|||
 *  @sk2: Socket belonging to the existing reuseport group.
 | 
			
		||||
 *  May return ENOMEM and not add socket to group under memory pressure.
 | 
			
		||||
 */
 | 
			
		||||
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 | 
			
		||||
int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 | 
			
		||||
{
 | 
			
		||||
	struct sock_reuseport *old_reuse, *reuse;
 | 
			
		||||
 | 
			
		||||
	if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
 | 
			
		||||
		int err = reuseport_alloc(sk2);
 | 
			
		||||
		int err = reuseport_alloc(sk2, bind_inany);
 | 
			
		||||
 | 
			
		||||
		if (err)
 | 
			
		||||
			return err;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -107,6 +107,15 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL(inet_rcv_saddr_equal);
 | 
			
		||||
 | 
			
		||||
bool inet_rcv_saddr_any(const struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
#if IS_ENABLED(CONFIG_IPV6)
 | 
			
		||||
	if (sk->sk_family == AF_INET6)
 | 
			
		||||
		return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
 | 
			
		||||
#endif
 | 
			
		||||
	return !sk->sk_rcv_saddr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void inet_get_local_port_range(struct net *net, int *low, int *high)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int seq;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -567,10 +567,11 @@ static int inet_reuseport_add_sock(struct sock *sk,
 | 
			
		|||
		    inet_csk(sk2)->icsk_bind_hash == tb &&
 | 
			
		||||
		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 | 
			
		||||
		    inet_rcv_saddr_equal(sk, sk2, false))
 | 
			
		||||
			return reuseport_add_sock(sk, sk2);
 | 
			
		||||
			return reuseport_add_sock(sk, sk2,
 | 
			
		||||
						  inet_rcv_saddr_any(sk));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return reuseport_alloc(sk);
 | 
			
		||||
	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int __inet_hash(struct sock *sk, struct sock *osk)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -221,11 +221,12 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
 | 
			
		|||
		    (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 | 
			
		||||
		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 | 
			
		||||
		    inet_rcv_saddr_equal(sk, sk2, false)) {
 | 
			
		||||
			return reuseport_add_sock(sk, sk2);
 | 
			
		||||
			return reuseport_add_sock(sk, sk2,
 | 
			
		||||
						  inet_rcv_saddr_any(sk));
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return reuseport_alloc(sk);
 | 
			
		||||
	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue