forked from mirrors/linux
		
	bpf: implement getsockopt and setsockopt hooks
Implement new BPF_PROG_TYPE_CGROUP_SOCKOPT program type and
BPF_CGROUP_{G,S}ETSOCKOPT cgroup hooks.
BPF_CGROUP_SETSOCKOPT can modify user setsockopt arguments before
passing them down to the kernel or bypass kernel completely.
BPF_CGROUP_GETSOCKOPT can can inspect/modify getsockopt arguments that
kernel returns.
Both hooks reuse existing PTR_TO_PACKET{,_END} infrastructure.
The buffer memory is pre-allocated (because I don't think there is
a precedent for working with __user memory from bpf). This might be
slow to do for each {s,g}etsockopt call, that's why I've added
__cgroup_bpf_prog_array_is_empty that exits early if there is nothing
attached to a cgroup. Note, however, that there is a race between
__cgroup_bpf_prog_array_is_empty and BPF_PROG_RUN_ARRAY where cgroup
program layout might have changed; this should not be a problem
because in general there is a race between multiple calls to
{s,g}etsocktop and user adding/removing bpf progs from a cgroup.
The return code of the BPF program is handled as follows:
* 0: EPERM
* 1: success, continue with next BPF program in the cgroup chain
v9:
* allow overwriting setsockopt arguments (Alexei Starovoitov):
  * use set_fs (same as kernel_setsockopt)
  * buffer is always kzalloc'd (no small on-stack buffer)
v8:
* use s32 for optlen (Andrii Nakryiko)
v7:
* return only 0 or 1 (Alexei Starovoitov)
* always run all progs (Alexei Starovoitov)
* use optval=0 as kernel bypass in setsockopt (Alexei Starovoitov)
  (decided to use optval=-1 instead, optval=0 might be a valid input)
* call getsockopt hook after kernel handlers (Alexei Starovoitov)
v6:
* rework cgroup chaining; stop as soon as bpf program returns
  0 or 2; see patch with the documentation for the details
* drop Andrii's and Martin's Acked-by (not sure they are comfortable
  with the new state of things)
v5:
* skip copy_to_user() and put_user() when ret == 0 (Martin Lau)
v4:
* don't export bpf_sk_fullsock helper (Martin Lau)
* size != sizeof(__u64) for uapi pointers (Martin Lau)
* offsetof instead of bpf_ctx_range when checking ctx access (Martin Lau)
v3:
* typos in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY comments (Andrii Nakryiko)
* reverse christmas tree in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY (Andrii
  Nakryiko)
* use __bpf_md_ptr instead of __u32 for optval{,_end} (Martin Lau)
* use BPF_FIELD_SIZEOF() for consistency (Martin Lau)
* new CG_SOCKOPT_ACCESS macro to wrap repeated parts
v2:
* moved bpf_sockopt_kern fields around to remove a hole (Martin Lau)
* aligned bpf_sockopt_kern->buf to 8 bytes (Martin Lau)
* bpf_prog_array_is_empty instead of bpf_prog_array_length (Martin Lau)
* added [0,2] return code check to verifier (Martin Lau)
* dropped unused buf[64] from the stack (Martin Lau)
* use PTR_TO_SOCKET for bpf_sockopt->sk (Martin Lau)
* dropped bpf_target_off from ctx rewrites (Martin Lau)
* use return code for kernel bypass (Martin Lau & Andrii Nakryiko)
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									3b1c667e47
								
							
						
					
					
						commit
						0d01da6afc
					
				
					 11 changed files with 472 additions and 1 deletions
				
			
		|  | @ -124,6 +124,14 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, | |||
| 				   loff_t *ppos, void **new_buf, | ||||
| 				   enum bpf_attach_type type); | ||||
| 
 | ||||
| int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, | ||||
| 				       int *optname, char __user *optval, | ||||
| 				       int *optlen, char **kernel_optval); | ||||
| int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, | ||||
| 				       int optname, char __user *optval, | ||||
| 				       int __user *optlen, int max_optlen, | ||||
| 				       int retval); | ||||
| 
 | ||||
| static inline enum bpf_cgroup_storage_type cgroup_storage_type( | ||||
| 	struct bpf_map *map) | ||||
| { | ||||
|  | @ -286,6 +294,38 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, | |||
| 	__ret;								       \ | ||||
| }) | ||||
| 
 | ||||
| #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen,   \ | ||||
| 				       kernel_optval)			       \ | ||||
| ({									       \ | ||||
| 	int __ret = 0;							       \ | ||||
| 	if (cgroup_bpf_enabled)						       \ | ||||
| 		__ret = __cgroup_bpf_run_filter_setsockopt(sock, level,	       \ | ||||
| 							   optname, optval,    \ | ||||
| 							   optlen,	       \ | ||||
| 							   kernel_optval);     \ | ||||
| 	__ret;								       \ | ||||
| }) | ||||
| 
 | ||||
| #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen)			       \ | ||||
| ({									       \ | ||||
| 	int __ret = 0;							       \ | ||||
| 	if (cgroup_bpf_enabled)						       \ | ||||
| 		get_user(__ret, optlen);				       \ | ||||
| 	__ret;								       \ | ||||
| }) | ||||
| 
 | ||||
| #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen,   \ | ||||
| 				       max_optlen, retval)		       \ | ||||
| ({									       \ | ||||
| 	int __ret = retval;						       \ | ||||
| 	if (cgroup_bpf_enabled)						       \ | ||||
| 		__ret = __cgroup_bpf_run_filter_getsockopt(sock, level,	       \ | ||||
| 							   optname, optval,    \ | ||||
| 							   optlen, max_optlen, \ | ||||
| 							   retval);	       \ | ||||
| 	__ret;								       \ | ||||
| }) | ||||
| 
 | ||||
| int cgroup_bpf_prog_attach(const union bpf_attr *attr, | ||||
| 			   enum bpf_prog_type ptype, struct bpf_prog *prog); | ||||
| int cgroup_bpf_prog_detach(const union bpf_attr *attr, | ||||
|  | @ -357,6 +397,11 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, | |||
| #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) | ||||
| #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) | ||||
| #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) | ||||
| #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) | ||||
| #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ | ||||
| 				       optlen, max_optlen, retval) ({ retval; }) | ||||
| #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ | ||||
| 				       kernel_optval) ({ 0; }) | ||||
| 
 | ||||
| #define for_each_cgroup_storage_type(stype) for (; false; ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -518,6 +518,7 @@ struct bpf_prog_array { | |||
| struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); | ||||
| void bpf_prog_array_free(struct bpf_prog_array *progs); | ||||
| int bpf_prog_array_length(struct bpf_prog_array *progs); | ||||
| bool bpf_prog_array_is_empty(struct bpf_prog_array *array); | ||||
| int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, | ||||
| 				__u32 __user *prog_ids, u32 cnt); | ||||
| 
 | ||||
|  | @ -1051,6 +1052,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto; | |||
| extern const struct bpf_func_proto bpf_get_local_storage_proto; | ||||
| extern const struct bpf_func_proto bpf_strtol_proto; | ||||
| extern const struct bpf_func_proto bpf_strtoul_proto; | ||||
| extern const struct bpf_func_proto bpf_tcp_sock_proto; | ||||
| 
 | ||||
| /* Shared helpers among cBPF and eBPF. */ | ||||
| void bpf_user_rnd_init_once(void); | ||||
|  |  | |||
|  | @ -30,6 +30,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) | |||
| #ifdef CONFIG_CGROUP_BPF | ||||
| BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) | ||||
| BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) | ||||
| BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) | ||||
| #endif | ||||
| #ifdef CONFIG_BPF_LIRC_MODE2 | ||||
| BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) | ||||
|  |  | |||
|  | @ -1199,4 +1199,14 @@ struct bpf_sysctl_kern { | |||
| 	u64 tmp_reg; | ||||
| }; | ||||
| 
 | ||||
| struct bpf_sockopt_kern { | ||||
| 	struct sock	*sk; | ||||
| 	u8		*optval; | ||||
| 	u8		*optval_end; | ||||
| 	s32		level; | ||||
| 	s32		optname; | ||||
| 	s32		optlen; | ||||
| 	s32		retval; | ||||
| }; | ||||
| 
 | ||||
| #endif /* __LINUX_FILTER_H__ */ | ||||
|  |  | |||
|  | @ -170,6 +170,7 @@ enum bpf_prog_type { | |||
| 	BPF_PROG_TYPE_FLOW_DISSECTOR, | ||||
| 	BPF_PROG_TYPE_CGROUP_SYSCTL, | ||||
| 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, | ||||
| 	BPF_PROG_TYPE_CGROUP_SOCKOPT, | ||||
| }; | ||||
| 
 | ||||
| enum bpf_attach_type { | ||||
|  | @ -194,6 +195,8 @@ enum bpf_attach_type { | |||
| 	BPF_CGROUP_SYSCTL, | ||||
| 	BPF_CGROUP_UDP4_RECVMSG, | ||||
| 	BPF_CGROUP_UDP6_RECVMSG, | ||||
| 	BPF_CGROUP_GETSOCKOPT, | ||||
| 	BPF_CGROUP_SETSOCKOPT, | ||||
| 	__MAX_BPF_ATTACH_TYPE | ||||
| }; | ||||
| 
 | ||||
|  | @ -3541,4 +3544,15 @@ struct bpf_sysctl { | |||
| 				 */ | ||||
| }; | ||||
| 
 | ||||
| struct bpf_sockopt { | ||||
| 	__bpf_md_ptr(struct bpf_sock *, sk); | ||||
| 	__bpf_md_ptr(void *, optval); | ||||
| 	__bpf_md_ptr(void *, optval_end); | ||||
| 
 | ||||
| 	__s32	level; | ||||
| 	__s32	optname; | ||||
| 	__s32	optlen; | ||||
| 	__s32	retval; | ||||
| }; | ||||
| 
 | ||||
| #endif /* _UAPI__LINUX_BPF_H__ */ | ||||
|  |  | |||
|  | @ -15,6 +15,7 @@ | |||
| #include <linux/bpf.h> | ||||
| #include <linux/bpf-cgroup.h> | ||||
| #include <net/sock.h> | ||||
| #include <net/bpf_sk_storage.h> | ||||
| 
 | ||||
| #include "../cgroup/cgroup-internal.h" | ||||
| 
 | ||||
|  | @ -938,6 +939,188 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, | |||
| } | ||||
| EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); | ||||
| 
 | ||||
| static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, | ||||
| 					     enum bpf_attach_type attach_type) | ||||
| { | ||||
| 	struct bpf_prog_array *prog_array; | ||||
| 	bool empty; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]); | ||||
| 	empty = bpf_prog_array_is_empty(prog_array); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return empty; | ||||
| } | ||||
| 
 | ||||
| static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) | ||||
| { | ||||
| 	if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	ctx->optval = kzalloc(max_optlen, GFP_USER); | ||||
| 	if (!ctx->optval) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	ctx->optval_end = ctx->optval + max_optlen; | ||||
| 	ctx->optlen = max_optlen; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) | ||||
| { | ||||
| 	kfree(ctx->optval); | ||||
| } | ||||
| 
 | ||||
| int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, | ||||
| 				       int *optname, char __user *optval, | ||||
| 				       int *optlen, char **kernel_optval) | ||||
| { | ||||
| 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | ||||
| 	struct bpf_sockopt_kern ctx = { | ||||
| 		.sk = sk, | ||||
| 		.level = *level, | ||||
| 		.optname = *optname, | ||||
| 	}; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/* Opportunistic check to see whether we have any BPF program
 | ||||
| 	 * attached to the hook so we don't waste time allocating | ||||
| 	 * memory and locking the socket. | ||||
| 	 */ | ||||
| 	if (!cgroup_bpf_enabled || | ||||
| 	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	ret = sockopt_alloc_buf(&ctx, *optlen); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	if (copy_from_user(ctx.optval, optval, *optlen) != 0) { | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	lock_sock(sk); | ||||
| 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT], | ||||
| 				 &ctx, BPF_PROG_RUN); | ||||
| 	release_sock(sk); | ||||
| 
 | ||||
| 	if (!ret) { | ||||
| 		ret = -EPERM; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ctx.optlen == -1) { | ||||
| 		/* optlen set to -1, bypass kernel */ | ||||
| 		ret = 1; | ||||
| 	} else if (ctx.optlen > *optlen || ctx.optlen < -1) { | ||||
| 		/* optlen is out of bounds */ | ||||
| 		ret = -EFAULT; | ||||
| 	} else { | ||||
| 		/* optlen within bounds, run kernel handler */ | ||||
| 		ret = 0; | ||||
| 
 | ||||
| 		/* export any potential modifications */ | ||||
| 		*level = ctx.level; | ||||
| 		*optname = ctx.optname; | ||||
| 		*optlen = ctx.optlen; | ||||
| 		*kernel_optval = ctx.optval; | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	if (ret) | ||||
| 		sockopt_free_buf(&ctx); | ||||
| 	return ret; | ||||
| } | ||||
| EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt); | ||||
| 
 | ||||
| int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, | ||||
| 				       int optname, char __user *optval, | ||||
| 				       int __user *optlen, int max_optlen, | ||||
| 				       int retval) | ||||
| { | ||||
| 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | ||||
| 	struct bpf_sockopt_kern ctx = { | ||||
| 		.sk = sk, | ||||
| 		.level = level, | ||||
| 		.optname = optname, | ||||
| 		.retval = retval, | ||||
| 	}; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/* Opportunistic check to see whether we have any BPF program
 | ||||
| 	 * attached to the hook so we don't waste time allocating | ||||
| 	 * memory and locking the socket. | ||||
| 	 */ | ||||
| 	if (!cgroup_bpf_enabled || | ||||
| 	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) | ||||
| 		return retval; | ||||
| 
 | ||||
| 	ret = sockopt_alloc_buf(&ctx, max_optlen); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	if (!retval) { | ||||
| 		/* If kernel getsockopt finished successfully,
 | ||||
| 		 * copy whatever was returned to the user back | ||||
| 		 * into our temporary buffer. Set optlen to the | ||||
| 		 * one that kernel returned as well to let | ||||
| 		 * BPF programs inspect the value. | ||||
| 		 */ | ||||
| 
 | ||||
| 		if (get_user(ctx.optlen, optlen)) { | ||||
| 			ret = -EFAULT; | ||||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		if (ctx.optlen > max_optlen) | ||||
| 			ctx.optlen = max_optlen; | ||||
| 
 | ||||
| 		if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) { | ||||
| 			ret = -EFAULT; | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	lock_sock(sk); | ||||
| 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], | ||||
| 				 &ctx, BPF_PROG_RUN); | ||||
| 	release_sock(sk); | ||||
| 
 | ||||
| 	if (!ret) { | ||||
| 		ret = -EPERM; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ctx.optlen > max_optlen) { | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/* BPF programs only allowed to set retval to 0, not some
 | ||||
| 	 * arbitrary value. | ||||
| 	 */ | ||||
| 	if (ctx.retval != 0 && ctx.retval != retval) { | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (copy_to_user(optval, ctx.optval, ctx.optlen) || | ||||
| 	    put_user(ctx.optlen, optlen)) { | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = ctx.retval; | ||||
| 
 | ||||
| out: | ||||
| 	sockopt_free_buf(&ctx); | ||||
| 	return ret; | ||||
| } | ||||
| EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt); | ||||
| 
 | ||||
| static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, | ||||
| 			      size_t *lenp) | ||||
| { | ||||
|  | @ -1198,3 +1381,153 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = { | |||
| 
 | ||||
| const struct bpf_prog_ops cg_sysctl_prog_ops = { | ||||
| }; | ||||
| 
 | ||||
| static const struct bpf_func_proto * | ||||
| cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | ||||
| { | ||||
| 	switch (func_id) { | ||||
| 	case BPF_FUNC_sk_storage_get: | ||||
| 		return &bpf_sk_storage_get_proto; | ||||
| 	case BPF_FUNC_sk_storage_delete: | ||||
| 		return &bpf_sk_storage_delete_proto; | ||||
| #ifdef CONFIG_INET | ||||
| 	case BPF_FUNC_tcp_sock: | ||||
| 		return &bpf_tcp_sock_proto; | ||||
| #endif | ||||
| 	default: | ||||
| 		return cgroup_base_func_proto(func_id, prog); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static bool cg_sockopt_is_valid_access(int off, int size, | ||||
| 				       enum bpf_access_type type, | ||||
| 				       const struct bpf_prog *prog, | ||||
| 				       struct bpf_insn_access_aux *info) | ||||
| { | ||||
| 	const int size_default = sizeof(__u32); | ||||
| 
 | ||||
| 	if (off < 0 || off >= sizeof(struct bpf_sockopt)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (off % size != 0) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (type == BPF_WRITE) { | ||||
| 		switch (off) { | ||||
| 		case offsetof(struct bpf_sockopt, retval): | ||||
| 			if (size != size_default) | ||||
| 				return false; | ||||
| 			return prog->expected_attach_type == | ||||
| 				BPF_CGROUP_GETSOCKOPT; | ||||
| 		case offsetof(struct bpf_sockopt, optname): | ||||
| 			/* fallthrough */ | ||||
| 		case offsetof(struct bpf_sockopt, level): | ||||
| 			if (size != size_default) | ||||
| 				return false; | ||||
| 			return prog->expected_attach_type == | ||||
| 				BPF_CGROUP_SETSOCKOPT; | ||||
| 		case offsetof(struct bpf_sockopt, optlen): | ||||
| 			return size == size_default; | ||||
| 		default: | ||||
| 			return false; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	switch (off) { | ||||
| 	case offsetof(struct bpf_sockopt, sk): | ||||
| 		if (size != sizeof(__u64)) | ||||
| 			return false; | ||||
| 		info->reg_type = PTR_TO_SOCKET; | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optval): | ||||
| 		if (size != sizeof(__u64)) | ||||
| 			return false; | ||||
| 		info->reg_type = PTR_TO_PACKET; | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optval_end): | ||||
| 		if (size != sizeof(__u64)) | ||||
| 			return false; | ||||
| 		info->reg_type = PTR_TO_PACKET_END; | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, retval): | ||||
| 		if (size != size_default) | ||||
| 			return false; | ||||
| 		return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; | ||||
| 	default: | ||||
| 		if (size != size_default) | ||||
| 			return false; | ||||
| 		break; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| #define CG_SOCKOPT_ACCESS_FIELD(T, F)					\ | ||||
| 	T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F),			\ | ||||
| 	  si->dst_reg, si->src_reg,					\ | ||||
| 	  offsetof(struct bpf_sockopt_kern, F)) | ||||
| 
 | ||||
| static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, | ||||
| 					 const struct bpf_insn *si, | ||||
| 					 struct bpf_insn *insn_buf, | ||||
| 					 struct bpf_prog *prog, | ||||
| 					 u32 *target_size) | ||||
| { | ||||
| 	struct bpf_insn *insn = insn_buf; | ||||
| 
 | ||||
| 	switch (si->off) { | ||||
| 	case offsetof(struct bpf_sockopt, sk): | ||||
| 		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, level): | ||||
| 		if (type == BPF_WRITE) | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level); | ||||
| 		else | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optname): | ||||
| 		if (type == BPF_WRITE) | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname); | ||||
| 		else | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optlen): | ||||
| 		if (type == BPF_WRITE) | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen); | ||||
| 		else | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, retval): | ||||
| 		if (type == BPF_WRITE) | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); | ||||
| 		else | ||||
| 			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optval): | ||||
| 		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); | ||||
| 		break; | ||||
| 	case offsetof(struct bpf_sockopt, optval_end): | ||||
| 		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return insn - insn_buf; | ||||
| } | ||||
| 
 | ||||
| static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, | ||||
| 				   bool direct_write, | ||||
| 				   const struct bpf_prog *prog) | ||||
| { | ||||
| 	/* Nothing to do for sockopt argument. The data is kzalloc'ated.
 | ||||
| 	 */ | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| const struct bpf_verifier_ops cg_sockopt_verifier_ops = { | ||||
| 	.get_func_proto		= cg_sockopt_func_proto, | ||||
| 	.is_valid_access	= cg_sockopt_is_valid_access, | ||||
| 	.convert_ctx_access	= cg_sockopt_convert_ctx_access, | ||||
| 	.gen_prologue		= cg_sockopt_get_prologue, | ||||
| }; | ||||
| 
 | ||||
| const struct bpf_prog_ops cg_sockopt_prog_ops = { | ||||
| }; | ||||
|  |  | |||
|  | @ -1809,6 +1809,15 @@ int bpf_prog_array_length(struct bpf_prog_array *array) | |||
| 	return cnt; | ||||
| } | ||||
| 
 | ||||
| bool bpf_prog_array_is_empty(struct bpf_prog_array *array) | ||||
| { | ||||
| 	struct bpf_prog_array_item *item; | ||||
| 
 | ||||
| 	for (item = array->items; item->prog; item++) | ||||
| 		if (item->prog != &dummy_bpf_prog.prog) | ||||
| 			return false; | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, | ||||
| 				     u32 *prog_ids, | ||||
|  |  | |||
|  | @ -1590,6 +1590,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, | |||
| 		default: | ||||
| 			return -EINVAL; | ||||
| 		} | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCKOPT: | ||||
| 		switch (expected_attach_type) { | ||||
| 		case BPF_CGROUP_SETSOCKOPT: | ||||
| 		case BPF_CGROUP_GETSOCKOPT: | ||||
| 			return 0; | ||||
| 		default: | ||||
| 			return -EINVAL; | ||||
| 		} | ||||
| 	default: | ||||
| 		return 0; | ||||
| 	} | ||||
|  | @ -1840,6 +1848,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, | |||
| 	switch (prog->type) { | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCK: | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCKOPT: | ||||
| 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL; | ||||
| 	case BPF_PROG_TYPE_CGROUP_SKB: | ||||
| 		return prog->enforce_expected_attach_type && | ||||
|  | @ -1912,6 +1921,10 @@ static int bpf_prog_attach(const union bpf_attr *attr) | |||
| 	case BPF_CGROUP_SYSCTL: | ||||
| 		ptype = BPF_PROG_TYPE_CGROUP_SYSCTL; | ||||
| 		break; | ||||
| 	case BPF_CGROUP_GETSOCKOPT: | ||||
| 	case BPF_CGROUP_SETSOCKOPT: | ||||
| 		ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT; | ||||
| 		break; | ||||
| 	default: | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
|  | @ -1995,6 +2008,10 @@ static int bpf_prog_detach(const union bpf_attr *attr) | |||
| 	case BPF_CGROUP_SYSCTL: | ||||
| 		ptype = BPF_PROG_TYPE_CGROUP_SYSCTL; | ||||
| 		break; | ||||
| 	case BPF_CGROUP_GETSOCKOPT: | ||||
| 	case BPF_CGROUP_SETSOCKOPT: | ||||
| 		ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT; | ||||
| 		break; | ||||
| 	default: | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
|  | @ -2031,6 +2048,8 @@ static int bpf_prog_query(const union bpf_attr *attr, | |||
| 	case BPF_CGROUP_SOCK_OPS: | ||||
| 	case BPF_CGROUP_DEVICE: | ||||
| 	case BPF_CGROUP_SYSCTL: | ||||
| 	case BPF_CGROUP_GETSOCKOPT: | ||||
| 	case BPF_CGROUP_SETSOCKOPT: | ||||
| 		break; | ||||
| 	case BPF_LIRC_MODE2: | ||||
| 		return lirc_prog_query(attr, uattr); | ||||
|  |  | |||
|  | @ -2215,6 +2215,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, | |||
| 
 | ||||
| 		env->seen_direct_write = true; | ||||
| 		return true; | ||||
| 
 | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCKOPT: | ||||
| 		if (t == BPF_WRITE) | ||||
| 			env->seen_direct_write = true; | ||||
| 
 | ||||
| 		return true; | ||||
| 
 | ||||
| 	default: | ||||
| 		return false; | ||||
| 	} | ||||
|  | @ -6066,6 +6073,7 @@ static int check_return_code(struct bpf_verifier_env *env) | |||
| 	case BPF_PROG_TYPE_SOCK_OPS: | ||||
| 	case BPF_PROG_TYPE_CGROUP_DEVICE: | ||||
| 	case BPF_PROG_TYPE_CGROUP_SYSCTL: | ||||
| 	case BPF_PROG_TYPE_CGROUP_SOCKOPT: | ||||
| 		break; | ||||
| 	default: | ||||
| 		return 0; | ||||
|  |  | |||
|  | @ -5651,7 +5651,7 @@ BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) | |||
| 	return (unsigned long)NULL; | ||||
| } | ||||
| 
 | ||||
| static const struct bpf_func_proto bpf_tcp_sock_proto = { | ||||
| const struct bpf_func_proto bpf_tcp_sock_proto = { | ||||
| 	.func		= bpf_tcp_sock, | ||||
| 	.gpl_only	= false, | ||||
| 	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL, | ||||
|  |  | |||
							
								
								
									
										30
									
								
								net/socket.c
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								net/socket.c
									
									
									
									
									
								
							|  | @ -2051,6 +2051,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, | |||
| static int __sys_setsockopt(int fd, int level, int optname, | ||||
| 			    char __user *optval, int optlen) | ||||
| { | ||||
| 	mm_segment_t oldfs = get_fs(); | ||||
| 	char *kernel_optval = NULL; | ||||
| 	int err, fput_needed; | ||||
| 	struct socket *sock; | ||||
| 
 | ||||
|  | @ -2063,6 +2065,22 @@ static int __sys_setsockopt(int fd, int level, int optname, | |||
| 		if (err) | ||||
| 			goto out_put; | ||||
| 
 | ||||
| 		err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, | ||||
| 						     &optname, optval, &optlen, | ||||
| 						     &kernel_optval); | ||||
| 
 | ||||
| 		if (err < 0) { | ||||
| 			goto out_put; | ||||
| 		} else if (err > 0) { | ||||
| 			err = 0; | ||||
| 			goto out_put; | ||||
| 		} | ||||
| 
 | ||||
| 		if (kernel_optval) { | ||||
| 			set_fs(KERNEL_DS); | ||||
| 			optval = (char __user __force *)kernel_optval; | ||||
| 		} | ||||
| 
 | ||||
| 		if (level == SOL_SOCKET) | ||||
| 			err = | ||||
| 			    sock_setsockopt(sock, level, optname, optval, | ||||
|  | @ -2071,6 +2089,11 @@ static int __sys_setsockopt(int fd, int level, int optname, | |||
| 			err = | ||||
| 			    sock->ops->setsockopt(sock, level, optname, optval, | ||||
| 						  optlen); | ||||
| 
 | ||||
| 		if (kernel_optval) { | ||||
| 			set_fs(oldfs); | ||||
| 			kfree(kernel_optval); | ||||
| 		} | ||||
| out_put: | ||||
| 		fput_light(sock->file, fput_needed); | ||||
| 	} | ||||
|  | @ -2093,6 +2116,7 @@ static int __sys_getsockopt(int fd, int level, int optname, | |||
| { | ||||
| 	int err, fput_needed; | ||||
| 	struct socket *sock; | ||||
| 	int max_optlen; | ||||
| 
 | ||||
| 	sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||||
| 	if (sock != NULL) { | ||||
|  | @ -2100,6 +2124,8 @@ static int __sys_getsockopt(int fd, int level, int optname, | |||
| 		if (err) | ||||
| 			goto out_put; | ||||
| 
 | ||||
| 		max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); | ||||
| 
 | ||||
| 		if (level == SOL_SOCKET) | ||||
| 			err = | ||||
| 			    sock_getsockopt(sock, level, optname, optval, | ||||
|  | @ -2108,6 +2134,10 @@ static int __sys_getsockopt(int fd, int level, int optname, | |||
| 			err = | ||||
| 			    sock->ops->getsockopt(sock, level, optname, optval, | ||||
| 						  optlen); | ||||
| 
 | ||||
| 		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, | ||||
| 						     optval, optlen, | ||||
| 						     max_optlen, err); | ||||
| out_put: | ||||
| 		fput_light(sock->file, fput_needed); | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Stanislav Fomichev
						Stanislav Fomichev