forked from mirrors/linux
		
	net: add skeleton of bpfilter kernel module
bpfilter.ko consists of bpfilter_kern.c (normal kernel module code) and user mode helper code that is embedded into bpfilter.ko The steps to build bpfilter.ko are the following: - main.c is compiled by HOSTCC into the bpfilter_umh elf executable file - with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file is converted into bpfilter_umh.o object file with _binary_net_bpfilter_bpfilter_umh_start and _end symbols Example: $ nm ./bld_x64/net/bpfilter/bpfilter_umh.o 0000000000004cf8 T _binary_net_bpfilter_bpfilter_umh_end 0000000000004cf8 A _binary_net_bpfilter_bpfilter_umh_size 0000000000000000 T _binary_net_bpfilter_bpfilter_umh_start - bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko bpfilter_kern.c is a normal kernel module code that calls the fork_usermode_blob() helper to execute part of its own data as a user mode process. Notice that _binary_net_bpfilter_bpfilter_umh_start - end is placed into .init.rodata section, so it's freed as soon as __init function of bpfilter.ko is finished. As part of __init the bpfilter.ko does first request/reply action via two unix pipe provided by fork_usermode_blob() helper to make sure that umh is healthy. If not it will kill it via pid. Later bpfilter_process_sockopt() will be called from bpfilter hooks in get/setsockopt() to pass iptable commands into umh via bpfilter.ko If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will kill umh as well. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									449325b52b
								
							
						
					
					
						commit
						d2ba09c17a
					
				
					 13 changed files with 339 additions and 0 deletions
				
			
		
							
								
								
									
										15
									
								
								include/linux/bpfilter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								include/linux/bpfilter.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| #ifndef _LINUX_BPFILTER_H | ||||
| #define _LINUX_BPFILTER_H | ||||
| 
 | ||||
| #include <uapi/linux/bpfilter.h> | ||||
| 
 | ||||
| struct sock; | ||||
| int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, | ||||
| 			    unsigned int optlen); | ||||
| int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, | ||||
| 			    int *optlen); | ||||
| extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, | ||||
| 				       char __user *optval, | ||||
| 				       unsigned int optlen, bool is_set); | ||||
| #endif | ||||
							
								
								
									
										21
									
								
								include/uapi/linux/bpfilter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								include/uapi/linux/bpfilter.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,21 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| #ifndef _UAPI_LINUX_BPFILTER_H | ||||
| #define _UAPI_LINUX_BPFILTER_H | ||||
| 
 | ||||
| #include <linux/if.h> | ||||
| 
 | ||||
| enum { | ||||
| 	BPFILTER_IPT_SO_SET_REPLACE = 64, | ||||
| 	BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, | ||||
| 	BPFILTER_IPT_SET_MAX, | ||||
| }; | ||||
| 
 | ||||
| enum { | ||||
| 	BPFILTER_IPT_SO_GET_INFO = 64, | ||||
| 	BPFILTER_IPT_SO_GET_ENTRIES = 65, | ||||
| 	BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, | ||||
| 	BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, | ||||
| 	BPFILTER_IPT_GET_MAX, | ||||
| }; | ||||
| 
 | ||||
| #endif /* _UAPI_LINUX_BPFILTER_H */ | ||||
|  | @ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig" | |||
| 
 | ||||
| endif | ||||
| 
 | ||||
| source "net/bpfilter/Kconfig" | ||||
| 
 | ||||
| source "net/dccp/Kconfig" | ||||
| source "net/sctp/Kconfig" | ||||
| source "net/rds/Kconfig" | ||||
|  |  | |||
|  | @ -20,6 +20,7 @@ obj-$(CONFIG_TLS)		+= tls/ | |||
| obj-$(CONFIG_XFRM)		+= xfrm/ | ||||
| obj-$(CONFIG_UNIX)		+= unix/ | ||||
| obj-$(CONFIG_NET)		+= ipv6/ | ||||
| obj-$(CONFIG_BPFILTER)		+= bpfilter/ | ||||
| obj-$(CONFIG_PACKET)		+= packet/ | ||||
| obj-$(CONFIG_NET_KEY)		+= key/ | ||||
| obj-$(CONFIG_BRIDGE)		+= bridge/ | ||||
|  |  | |||
							
								
								
									
										16
									
								
								net/bpfilter/Kconfig
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								net/bpfilter/Kconfig
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| menuconfig BPFILTER | ||||
| 	bool "BPF based packet filtering framework (BPFILTER)" | ||||
| 	default n | ||||
| 	depends on NET && BPF | ||||
| 	help | ||||
| 	  This builds experimental bpfilter framework that is aiming to | ||||
| 	  provide netfilter compatible functionality via BPF | ||||
| 
 | ||||
| if BPFILTER | ||||
| config BPFILTER_UMH | ||||
| 	tristate "bpfilter kernel module with user mode helper" | ||||
| 	default m | ||||
| 	help | ||||
| 	  This builds bpfilter kernel module with embedded user mode helper | ||||
| endif | ||||
| 
 | ||||
							
								
								
									
										30
									
								
								net/bpfilter/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								net/bpfilter/Makefile
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| # SPDX-License-Identifier: GPL-2.0
 | ||||
| #
 | ||||
| # Makefile for the Linux BPFILTER layer.
 | ||||
| #
 | ||||
| 
 | ||||
| hostprogs-y := bpfilter_umh | ||||
| bpfilter_umh-objs := main.o | ||||
| HOSTCFLAGS += -I. -Itools/include/ | ||||
| ifeq ($(CONFIG_BPFILTER_UMH), y) | ||||
| # builtin bpfilter_umh should be compiled with -static
 | ||||
| # since rootfs isn't mounted at the time of __init
 | ||||
| # function is called and do_execv won't find elf interpreter
 | ||||
| HOSTLDFLAGS += -static | ||||
| endif | ||||
| 
 | ||||
| # a bit of elf magic to convert bpfilter_umh binary into a binary blob
 | ||||
| # inside bpfilter_umh.o elf file referenced by
 | ||||
| # _binary_net_bpfilter_bpfilter_umh_start symbol
 | ||||
| # which bpfilter_kern.c passes further into umh blob loader at run-time
 | ||||
| quiet_cmd_copy_umh = GEN $@ | ||||
|       cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
 | ||||
|       $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
 | ||||
|       -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
 | ||||
|       --rename-section .data=.init.rodata $< $@ | ||||
| 
 | ||||
| $(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh | ||||
| 	$(call cmd,copy_umh) | ||||
| 
 | ||||
| obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o | ||||
| bpfilter-objs += bpfilter_kern.o bpfilter_umh.o | ||||
							
								
								
									
										111
									
								
								net/bpfilter/bpfilter_kern.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								net/bpfilter/bpfilter_kern.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,111 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||||
| #include <linux/init.h> | ||||
| #include <linux/module.h> | ||||
| #include <linux/umh.h> | ||||
| #include <linux/bpfilter.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/sched/signal.h> | ||||
| #include <linux/fs.h> | ||||
| #include <linux/file.h> | ||||
| #include "msgfmt.h" | ||||
| 
 | ||||
| #define UMH_start _binary_net_bpfilter_bpfilter_umh_start | ||||
| #define UMH_end _binary_net_bpfilter_bpfilter_umh_end | ||||
| 
 | ||||
| extern char UMH_start; | ||||
| extern char UMH_end; | ||||
| 
 | ||||
| static struct umh_info info; | ||||
| /* since ip_getsockopt() can run in parallel, serialize access to umh */ | ||||
| static DEFINE_MUTEX(bpfilter_lock); | ||||
| 
 | ||||
| static void shutdown_umh(struct umh_info *info) | ||||
| { | ||||
| 	struct task_struct *tsk; | ||||
| 
 | ||||
| 	tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); | ||||
| 	if (tsk) | ||||
| 		force_sig(SIGKILL, tsk); | ||||
| 	fput(info->pipe_to_umh); | ||||
| 	fput(info->pipe_from_umh); | ||||
| } | ||||
| 
 | ||||
| static void __stop_umh(void) | ||||
| { | ||||
| 	if (bpfilter_process_sockopt) { | ||||
| 		bpfilter_process_sockopt = NULL; | ||||
| 		shutdown_umh(&info); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void stop_umh(void) | ||||
| { | ||||
| 	mutex_lock(&bpfilter_lock); | ||||
| 	__stop_umh(); | ||||
| 	mutex_unlock(&bpfilter_lock); | ||||
| } | ||||
| 
 | ||||
| static int __bpfilter_process_sockopt(struct sock *sk, int optname, | ||||
| 				      char __user *optval, | ||||
| 				      unsigned int optlen, bool is_set) | ||||
| { | ||||
| 	struct mbox_request req; | ||||
| 	struct mbox_reply reply; | ||||
| 	loff_t pos; | ||||
| 	ssize_t n; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	req.is_set = is_set; | ||||
| 	req.pid = current->pid; | ||||
| 	req.cmd = optname; | ||||
| 	req.addr = (long)optval; | ||||
| 	req.len = optlen; | ||||
| 	mutex_lock(&bpfilter_lock); | ||||
| 	n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); | ||||
| 	if (n != sizeof(req)) { | ||||
| 		pr_err("write fail %zd\n", n); | ||||
| 		__stop_umh(); | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	pos = 0; | ||||
| 	n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); | ||||
| 	if (n != sizeof(reply)) { | ||||
| 		pr_err("read fail %zd\n", n); | ||||
| 		__stop_umh(); | ||||
| 		ret = -EFAULT; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	ret = reply.status; | ||||
| out: | ||||
| 	mutex_unlock(&bpfilter_lock); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int __init load_umh(void) | ||||
| { | ||||
| 	int err; | ||||
| 
 | ||||
| 	/* fork usermode process */ | ||||
| 	err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); | ||||
| 	if (err) | ||||
| 		return err; | ||||
| 	pr_info("Loaded bpfilter_umh pid %d\n", info.pid); | ||||
| 
 | ||||
| 	/* health check that usermode process started correctly */ | ||||
| 	if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { | ||||
| 		stop_umh(); | ||||
| 		return -EFAULT; | ||||
| 	} | ||||
| 	bpfilter_process_sockopt = &__bpfilter_process_sockopt; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void __exit fini_umh(void) | ||||
| { | ||||
| 	stop_umh(); | ||||
| } | ||||
| module_init(load_umh); | ||||
| module_exit(fini_umh); | ||||
| MODULE_LICENSE("GPL"); | ||||
							
								
								
									
										63
									
								
								net/bpfilter/main.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								net/bpfilter/main.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| #define _GNU_SOURCE | ||||
| #include <sys/uio.h> | ||||
| #include <errno.h> | ||||
| #include <stdio.h> | ||||
| #include <sys/socket.h> | ||||
| #include <fcntl.h> | ||||
| #include <unistd.h> | ||||
| #include "include/uapi/linux/bpf.h" | ||||
| #include <asm/unistd.h> | ||||
| #include "msgfmt.h" | ||||
| 
 | ||||
| int debug_fd; | ||||
| 
 | ||||
| static int handle_get_cmd(struct mbox_request *cmd) | ||||
| { | ||||
| 	switch (cmd->cmd) { | ||||
| 	case 0: | ||||
| 		return 0; | ||||
| 	default: | ||||
| 		break; | ||||
| 	} | ||||
| 	return -ENOPROTOOPT; | ||||
| } | ||||
| 
 | ||||
| static int handle_set_cmd(struct mbox_request *cmd) | ||||
| { | ||||
| 	return -ENOPROTOOPT; | ||||
| } | ||||
| 
 | ||||
| static void loop(void) | ||||
| { | ||||
| 	while (1) { | ||||
| 		struct mbox_request req; | ||||
| 		struct mbox_reply reply; | ||||
| 		int n; | ||||
| 
 | ||||
| 		n = read(0, &req, sizeof(req)); | ||||
| 		if (n != sizeof(req)) { | ||||
| 			dprintf(debug_fd, "invalid request %d\n", n); | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		reply.status = req.is_set ? | ||||
| 			handle_set_cmd(&req) : | ||||
| 			handle_get_cmd(&req); | ||||
| 
 | ||||
| 		n = write(1, &reply, sizeof(reply)); | ||||
| 		if (n != sizeof(reply)) { | ||||
| 			dprintf(debug_fd, "reply failed %d\n", n); | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int main(void) | ||||
| { | ||||
| 	debug_fd = open("/dev/console", 00000002 | 00000100); | ||||
| 	dprintf(debug_fd, "Started bpfilter\n"); | ||||
| 	loop(); | ||||
| 	close(debug_fd); | ||||
| 	return 0; | ||||
| } | ||||
							
								
								
									
										17
									
								
								net/bpfilter/msgfmt.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								net/bpfilter/msgfmt.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| #ifndef _NET_BPFILTER_MSGFMT_H | ||||
| #define _NET_BPFILTER_MSGFMT_H | ||||
| 
 | ||||
| struct mbox_request { | ||||
| 	__u64 addr; | ||||
| 	__u32 len; | ||||
| 	__u32 is_set; | ||||
| 	__u32 cmd; | ||||
| 	__u32 pid; | ||||
| }; | ||||
| 
 | ||||
| struct mbox_reply { | ||||
| 	__u32 status; | ||||
| }; | ||||
| 
 | ||||
| #endif | ||||
|  | @ -16,6 +16,8 @@ obj-y     := route.o inetpeer.o protocol.o \ | |||
| 	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
 | ||||
| 	     metrics.o | ||||
| 
 | ||||
| obj-$(CONFIG_BPFILTER) += bpfilter/ | ||||
| 
 | ||||
| obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o | ||||
| obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | ||||
| obj-$(CONFIG_PROC_FS) += proc.o | ||||
|  |  | |||
							
								
								
									
										2
									
								
								net/ipv4/bpfilter/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								net/ipv4/bpfilter/Makefile
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,2 @@ | |||
| obj-$(CONFIG_BPFILTER) += sockopt.o | ||||
| 
 | ||||
							
								
								
									
										42
									
								
								net/ipv4/bpfilter/sockopt.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								net/ipv4/bpfilter/sockopt.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | |||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| #include <linux/uaccess.h> | ||||
| #include <linux/bpfilter.h> | ||||
| #include <uapi/linux/bpf.h> | ||||
| #include <linux/wait.h> | ||||
| #include <linux/kmod.h> | ||||
| 
 | ||||
| int (*bpfilter_process_sockopt)(struct sock *sk, int optname, | ||||
| 				char __user *optval, | ||||
| 				unsigned int optlen, bool is_set); | ||||
| EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); | ||||
| 
 | ||||
| int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, | ||||
| 			  unsigned int optlen, bool is_set) | ||||
| { | ||||
| 	if (!bpfilter_process_sockopt) { | ||||
| 		int err = request_module("bpfilter"); | ||||
| 
 | ||||
| 		if (err) | ||||
| 			return err; | ||||
| 		if (!bpfilter_process_sockopt) | ||||
| 			return -ECHILD; | ||||
| 	} | ||||
| 	return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); | ||||
| } | ||||
| 
 | ||||
| int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, | ||||
| 			    unsigned int optlen) | ||||
| { | ||||
| 	return bpfilter_mbox_request(sk, optname, optval, optlen, true); | ||||
| } | ||||
| 
 | ||||
| int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, | ||||
| 			    int __user *optlen) | ||||
| { | ||||
| 	int len; | ||||
| 
 | ||||
| 	if (get_user(len, optlen)) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	return bpfilter_mbox_request(sk, optname, optval, len, false); | ||||
| } | ||||
|  | @ -47,6 +47,8 @@ | |||
| #include <linux/errqueue.h> | ||||
| #include <linux/uaccess.h> | ||||
| 
 | ||||
| #include <linux/bpfilter.h> | ||||
| 
 | ||||
| /*
 | ||||
|  *	SOL_IP control messages. | ||||
|  */ | ||||
|  | @ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level, | |||
| 		return -ENOPROTOOPT; | ||||
| 
 | ||||
| 	err = do_ip_setsockopt(sk, level, optname, optval, optlen); | ||||
| #ifdef CONFIG_BPFILTER | ||||
| 	if (optname >= BPFILTER_IPT_SO_SET_REPLACE && | ||||
| 	    optname < BPFILTER_IPT_SET_MAX) | ||||
| 		err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); | ||||
| #endif | ||||
| #ifdef CONFIG_NETFILTER | ||||
| 	/* we need to exclude all possible ENOPROTOOPTs except default case */ | ||||
| 	if (err == -ENOPROTOOPT && optname != IP_HDRINCL && | ||||
|  | @ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level, | |||
| 	int err; | ||||
| 
 | ||||
| 	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); | ||||
| #ifdef CONFIG_BPFILTER | ||||
| 	if (optname >= BPFILTER_IPT_SO_GET_INFO && | ||||
| 	    optname < BPFILTER_IPT_GET_MAX) | ||||
| 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); | ||||
| #endif | ||||
| #ifdef CONFIG_NETFILTER | ||||
| 	/* we need to exclude all possible ENOPROTOOPTs except default case */ | ||||
| 	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && | ||||
|  | @ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, | |||
| 	err = do_ip_getsockopt(sk, level, optname, optval, optlen, | ||||
| 		MSG_CMSG_COMPAT); | ||||
| 
 | ||||
| #ifdef CONFIG_BPFILTER | ||||
| 	if (optname >= BPFILTER_IPT_SO_GET_INFO && | ||||
| 	    optname < BPFILTER_IPT_GET_MAX) | ||||
| 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); | ||||
| #endif | ||||
| #ifdef CONFIG_NETFILTER | ||||
| 	/* we need to exclude all possible ENOPROTOOPTs except default case */ | ||||
| 	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Alexei Starovoitov
						Alexei Starovoitov