forked from mirrors/linux
		
	bpf: Add new cgroup attach type to enable sock modifications
Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to
BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run
any time a process in the cgroup opens an AF_INET or AF_INET6 socket.
Currently only sk_bound_dev_if is exported to userspace for modification
by a bpf program.
This allows a cgroup to be configured such that AF_INET{6} sockets opened
by processes are automatically bound to a specific device. In turn, this
enables the running of programs that do not support SO_BINDTODEVICE in a
specific VRF context / L3 domain.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									b2cd12574a
								
							
						
					
					
						commit
						6102365876
					
				
					 7 changed files with 138 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -40,6 +40,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 | 
			
		|||
				struct sk_buff *skb,
 | 
			
		||||
				enum bpf_attach_type type);
 | 
			
		||||
 | 
			
		||||
int __cgroup_bpf_run_filter_sk(struct sock *sk,
 | 
			
		||||
			       enum bpf_attach_type type);
 | 
			
		||||
 | 
			
		||||
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 | 
			
		||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 | 
			
		||||
({									      \
 | 
			
		||||
| 
						 | 
				
			
			@ -63,6 +66,16 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 | 
			
		|||
	__ret;								       \
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 | 
			
		||||
({									       \
 | 
			
		||||
	int __ret = 0;							       \
 | 
			
		||||
	if (cgroup_bpf_enabled && sk) {					       \
 | 
			
		||||
		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
 | 
			
		||||
						 BPF_CGROUP_INET_SOCK_CREATE); \
 | 
			
		||||
	}								       \
 | 
			
		||||
	__ret;								       \
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
struct cgroup_bpf {};
 | 
			
		||||
| 
						 | 
				
			
			@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 | 
			
		|||
 | 
			
		||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 | 
			
		||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 | 
			
		||||
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 | 
			
		||||
 | 
			
		||||
#endif /* CONFIG_CGROUP_BPF */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -101,6 +101,7 @@ enum bpf_prog_type {
 | 
			
		|||
	BPF_PROG_TYPE_XDP,
 | 
			
		||||
	BPF_PROG_TYPE_PERF_EVENT,
 | 
			
		||||
	BPF_PROG_TYPE_CGROUP_SKB,
 | 
			
		||||
	BPF_PROG_TYPE_CGROUP_SOCK,
 | 
			
		||||
	BPF_PROG_TYPE_LWT_IN,
 | 
			
		||||
	BPF_PROG_TYPE_LWT_OUT,
 | 
			
		||||
	BPF_PROG_TYPE_LWT_XMIT,
 | 
			
		||||
| 
						 | 
				
			
			@ -109,6 +110,7 @@ enum bpf_prog_type {
 | 
			
		|||
enum bpf_attach_type {
 | 
			
		||||
	BPF_CGROUP_INET_INGRESS,
 | 
			
		||||
	BPF_CGROUP_INET_EGRESS,
 | 
			
		||||
	BPF_CGROUP_INET_SOCK_CREATE,
 | 
			
		||||
	__MAX_BPF_ATTACH_TYPE
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -567,6 +569,10 @@ enum bpf_ret_code {
 | 
			
		|||
	/* >127 are reserved for prog type specific return codes */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct bpf_sock {
 | 
			
		||||
	__u32 bound_dev_if;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* User return codes for XDP prog type.
 | 
			
		||||
 * A valid XDP program must return one of these defined values. All other
 | 
			
		||||
 * return codes are reserved for future use. Unknown return codes will result
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -165,3 +165,36 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 | 
			
		||||
 * @sk: sock structure to manipulate
 | 
			
		||||
 * @type: The type of program to be exectuted
 | 
			
		||||
 *
 | 
			
		||||
 * socket is passed is expected to be of type INET or INET6.
 | 
			
		||||
 *
 | 
			
		||||
 * The program type passed in via @type must be suitable for sock
 | 
			
		||||
 * filtering. No further check is performed to assert that.
 | 
			
		||||
 *
 | 
			
		||||
 * This function will return %-EPERM if any if an attached program was found
 | 
			
		||||
 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 | 
			
		||||
 */
 | 
			
		||||
int __cgroup_bpf_run_filter_sk(struct sock *sk,
 | 
			
		||||
			       enum bpf_attach_type type)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 | 
			
		||||
	struct bpf_prog *prog;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	prog = rcu_dereference(cgrp->bpf.effective[type]);
 | 
			
		||||
	if (prog)
 | 
			
		||||
		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -869,7 +869,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 | 
			
		|||
	case BPF_CGROUP_INET_EGRESS:
 | 
			
		||||
		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case BPF_CGROUP_INET_SOCK_CREATE:
 | 
			
		||||
		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -905,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 | 
			
		|||
	switch (attr->attach_type) {
 | 
			
		||||
	case BPF_CGROUP_INET_INGRESS:
 | 
			
		||||
	case BPF_CGROUP_INET_EGRESS:
 | 
			
		||||
	case BPF_CGROUP_INET_SOCK_CREATE:
 | 
			
		||||
		cgrp = cgroup_get_from_fd(attr->target_fd);
 | 
			
		||||
		if (IS_ERR(cgrp))
 | 
			
		||||
			return PTR_ERR(cgrp);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size,
 | 
			
		|||
	return __is_valid_access(off, size, type);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool sock_filter_is_valid_access(int off, int size,
 | 
			
		||||
					enum bpf_access_type type,
 | 
			
		||||
					enum bpf_reg_type *reg_type)
 | 
			
		||||
{
 | 
			
		||||
	if (type == BPF_WRITE) {
 | 
			
		||||
		switch (off) {
 | 
			
		||||
		case offsetof(struct bpf_sock, bound_dev_if):
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (off < 0 || off + size > sizeof(struct bpf_sock))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	/* The verifier guarantees that size > 0. */
 | 
			
		||||
	if (off % size != 0)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (size != sizeof(__u32))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 | 
			
		||||
			       const struct bpf_prog *prog)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -3076,6 +3102,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 | 
			
		|||
	return insn - insn_buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 | 
			
		||||
					  int dst_reg, int src_reg,
 | 
			
		||||
					  int ctx_off,
 | 
			
		||||
					  struct bpf_insn *insn_buf,
 | 
			
		||||
					  struct bpf_prog *prog)
 | 
			
		||||
{
 | 
			
		||||
	struct bpf_insn *insn = insn_buf;
 | 
			
		||||
 | 
			
		||||
	switch (ctx_off) {
 | 
			
		||||
	case offsetof(struct bpf_sock, bound_dev_if):
 | 
			
		||||
		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
 | 
			
		||||
 | 
			
		||||
		if (type == BPF_WRITE)
 | 
			
		||||
			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
 | 
			
		||||
					offsetof(struct sock, sk_bound_dev_if));
 | 
			
		||||
		else
 | 
			
		||||
			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
 | 
			
		||||
				      offsetof(struct sock, sk_bound_dev_if));
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return insn - insn_buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 | 
			
		||||
					 int src_reg, int ctx_off,
 | 
			
		||||
					 struct bpf_insn *insn_buf,
 | 
			
		||||
| 
						 | 
				
			
			@ -3162,6 +3212,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
 | 
			
		|||
	.gen_prologue		= tc_cls_act_prologue,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct bpf_verifier_ops cg_sock_ops = {
 | 
			
		||||
	.get_func_proto		= sk_filter_func_proto,
 | 
			
		||||
	.is_valid_access	= sock_filter_is_valid_access,
 | 
			
		||||
	.convert_ctx_access	= sock_filter_convert_ctx_access,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 | 
			
		||||
	.ops	= &sk_filter_ops,
 | 
			
		||||
	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
 | 
			
		||||
| 
						 | 
				
			
			@ -3202,6 +3258,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
 | 
			
		|||
	.type	= BPF_PROG_TYPE_LWT_XMIT,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct bpf_prog_type_list cg_sock_type __read_mostly = {
 | 
			
		||||
	.ops	= &cg_sock_ops,
 | 
			
		||||
	.type	= BPF_PROG_TYPE_CGROUP_SOCK
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int __init register_sk_filter_ops(void)
 | 
			
		||||
{
 | 
			
		||||
	bpf_register_prog_type(&sk_filter_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -3209,6 +3270,7 @@ static int __init register_sk_filter_ops(void)
 | 
			
		|||
	bpf_register_prog_type(&sched_act_type);
 | 
			
		||||
	bpf_register_prog_type(&xdp_type);
 | 
			
		||||
	bpf_register_prog_type(&cg_skb_type);
 | 
			
		||||
	bpf_register_prog_type(&cg_sock_type);
 | 
			
		||||
	bpf_register_prog_type(&lwt_in_type);
 | 
			
		||||
	bpf_register_prog_type(&lwt_out_type);
 | 
			
		||||
	bpf_register_prog_type(&lwt_xmit_type);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -374,8 +374,18 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 | 
			
		|||
 | 
			
		||||
	if (sk->sk_prot->init) {
 | 
			
		||||
		err = sk->sk_prot->init(sk);
 | 
			
		||||
		if (err)
 | 
			
		||||
		if (err) {
 | 
			
		||||
			sk_common_release(sk);
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!kern) {
 | 
			
		||||
		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
 | 
			
		||||
		if (err) {
 | 
			
		||||
			sk_common_release(sk);
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
out:
 | 
			
		||||
	return err;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -258,6 +258,14 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 | 
			
		|||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!kern) {
 | 
			
		||||
		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
 | 
			
		||||
		if (err) {
 | 
			
		||||
			sk_common_release(sk);
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
out:
 | 
			
		||||
	return err;
 | 
			
		||||
out_rcu_unlock:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue