forked from mirrors/linux
		
	sk-filter: Add ability to get socket filter program (v2)
The SO_ATTACH_FILTER option is set only. I propose to add the get ability by using SO_ATTACH_FILTER in getsockopt. To be less irritating to eyes the SO_GET_FILTER alias to it is declared. This ability is required by checkpoint-restore project to be able to save full state of a socket. There are two issues with getting filter back. First, kernel modifies the sock_filter->code on filter load, thus in order to return the filter element back to user we have to decode it into user-visible constants. Fortunately the modification in question is interconvertible. Second, the BPF_S_ALU_DIV_K code modifies the command argument k to speed up the run-time division by doing kernel_k = reciprocal(user_k). Bad news is that different user_k may result in same kernel_k, so we can't get the original user_k back. Good news is that we don't have to do it. What we need to is calculate a user2_k so, that reciprocal(user2_k) == reciprocal(user_k) == kernel_k i.e. if it's re-loaded back the compiled again value will be exactly the same as it was. That said, the user2_k can be calculated like this user2_k = reciprocal(kernel_k) with an exception, that if kernel_k == 0, then user2_k == 1. The optlen argument is treated like this -- when zero, kernel returns the amount of sock_fprog elements in filter, otherwise it should be large enough for the sock_fprog array. changes since v1: * Declared SO_GET_FILTER in all arch headers * Added decode of vlan-tag codes Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									96442e4242
								
							
						
					
					
						commit
						a8fc927780
					
				
					 19 changed files with 153 additions and 0 deletions
				
			
		|  | @ -47,6 +47,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -42,6 +42,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP           29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME             28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME             28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -49,6 +49,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER	26 | ||||
| #define SO_DETACH_FILTER	27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME             28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -63,6 +63,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME             28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -40,6 +40,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -48,6 +48,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        0x401a | ||||
| #define SO_DETACH_FILTER        0x401b | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_ACCEPTCONN		0x401c | ||||
| 
 | ||||
|  |  | |||
|  | @ -47,6 +47,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER	26 | ||||
| #define SO_DETACH_FILTER	27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -46,6 +46,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -41,6 +41,7 @@ | |||
| 
 | ||||
| #define SO_ATTACH_FILTER	0x001a | ||||
| #define SO_DETACH_FILTER        0x001b | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		0x001c | ||||
| #define SO_TIMESTAMP		0x001d | ||||
|  |  | |||
|  | @ -52,6 +52,7 @@ | |||
| 
 | ||||
| #define SO_ATTACH_FILTER        26 | ||||
| #define SO_DETACH_FILTER        27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -45,6 +45,7 @@ extern void sk_unattached_filter_destroy(struct sk_filter *fp); | |||
| extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); | ||||
| extern int sk_detach_filter(struct sock *sk); | ||||
| extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); | ||||
| extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); | ||||
| 
 | ||||
| #ifdef CONFIG_BPF_JIT | ||||
| extern void bpf_jit_compile(struct sk_filter *fp); | ||||
|  |  | |||
|  | @ -43,6 +43,7 @@ | |||
| /* Socket filtering */ | ||||
| #define SO_ATTACH_FILTER	26 | ||||
| #define SO_DETACH_FILTER	27 | ||||
| #define SO_GET_FILTER		SO_ATTACH_FILTER | ||||
| 
 | ||||
| #define SO_PEERNAME		28 | ||||
| #define SO_TIMESTAMP		29 | ||||
|  |  | |||
|  | @ -760,3 +760,133 @@ int sk_detach_filter(struct sock *sk) | |||
| 	return ret; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(sk_detach_filter); | ||||
| 
 | ||||
| static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) | ||||
| { | ||||
| 	static const u16 decodes[] = { | ||||
| 		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K, | ||||
| 		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X, | ||||
| 		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K, | ||||
| 		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X, | ||||
| 		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K, | ||||
| 		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X, | ||||
| 		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X, | ||||
| 		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K, | ||||
| 		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X, | ||||
| 		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K, | ||||
| 		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X, | ||||
| 		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K, | ||||
| 		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X, | ||||
| 		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K, | ||||
| 		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X, | ||||
| 		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K, | ||||
| 		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X, | ||||
| 		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K, | ||||
| 		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X, | ||||
| 		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG, | ||||
| 		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS, | ||||
| 		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS, | ||||
| 		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, | ||||
| 		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN, | ||||
| 		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND, | ||||
| 		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND, | ||||
| 		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND, | ||||
| 		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM, | ||||
| 		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN, | ||||
| 		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH, | ||||
| 		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM, | ||||
| 		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX, | ||||
| 		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA, | ||||
| 		[BPF_S_RET_K]		= BPF_RET|BPF_K, | ||||
| 		[BPF_S_RET_A]		= BPF_RET|BPF_A, | ||||
| 		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K, | ||||
| 		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM, | ||||
| 		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM, | ||||
| 		[BPF_S_ST]		= BPF_ST, | ||||
| 		[BPF_S_STX]		= BPF_STX, | ||||
| 		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA, | ||||
| 		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K, | ||||
| 		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X, | ||||
| 		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K, | ||||
| 		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X, | ||||
| 		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K, | ||||
| 		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X, | ||||
| 		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K, | ||||
| 		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X, | ||||
| 	}; | ||||
| 	u16 code; | ||||
| 
 | ||||
| 	code = filt->code; | ||||
| 
 | ||||
| 	to->code = decodes[code]; | ||||
| 	to->jt = filt->jt; | ||||
| 	to->jf = filt->jf; | ||||
| 
 | ||||
| 	if (code == BPF_S_ALU_DIV_K) { | ||||
| 		/*
 | ||||
| 		 * When loaded this rule user gave us X, which was | ||||
| 		 * translated into R = r(X). Now we calculate the | ||||
| 		 * RR = r(R) and report it back. If next time this | ||||
| 		 * value is loaded and RRR = r(RR) is calculated | ||||
| 		 * then the R == RRR will be true. | ||||
| 		 * | ||||
| 		 * One exception. X == 1 translates into R == 0 and | ||||
| 		 * we can't calculate RR out of it with r(). | ||||
| 		 */ | ||||
| 
 | ||||
| 		if (filt->k == 0) | ||||
| 			to->k = 1; | ||||
| 		else | ||||
| 			to->k = reciprocal_value(filt->k); | ||||
| 
 | ||||
| 		BUG_ON(reciprocal_value(to->k) != filt->k); | ||||
| 	} else | ||||
| 		to->k = filt->k; | ||||
| } | ||||
| 
 | ||||
| int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) | ||||
| { | ||||
| 	struct sk_filter *filter; | ||||
| 	int i, ret; | ||||
| 
 | ||||
| 	lock_sock(sk); | ||||
| 	filter = rcu_dereference_protected(sk->sk_filter, | ||||
| 			sock_owned_by_user(sk)); | ||||
| 	ret = 0; | ||||
| 	if (!filter) | ||||
| 		goto out; | ||||
| 	ret = filter->len; | ||||
| 	if (!len) | ||||
| 		goto out; | ||||
| 	ret = -EINVAL; | ||||
| 	if (len < filter->len) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	ret = -EFAULT; | ||||
| 	for (i = 0; i < filter->len; i++) { | ||||
| 		struct sock_filter fb; | ||||
| 
 | ||||
| 		sk_decode_filter(&filter->insns[i], &fb); | ||||
| 		if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) | ||||
| 			goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = filter->len; | ||||
| out: | ||||
| 	release_sock(sk); | ||||
| 	return ret; | ||||
| } | ||||
|  |  | |||
|  | @ -1077,6 +1077,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
| 	case SO_BINDTODEVICE: | ||||
| 		v.val = sk->sk_bound_dev_if; | ||||
| 		break; | ||||
| 	case SO_GET_FILTER: | ||||
| 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); | ||||
| 		if (len < 0) | ||||
| 			return len; | ||||
| 
 | ||||
| 		goto lenout; | ||||
| 	default: | ||||
| 		return -ENOPROTOOPT; | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Pavel Emelyanov
						Pavel Emelyanov