forked from mirrors/linux
		
	netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
This commit adds support for enabling IP defrag using pre-existing netfilter defrag support. Basically all the flag does is bump a refcnt while the link the active. Checks are also added to ensure the prog requesting defrag support is run _after_ netfilter defrag hooks. We also take care to avoid any issues w.r.t. module unloading -- while defrag is active on a link, the module is prevented from unloading. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> Reviewed-by: Florian Westphal <fw@strlen.de> Link: https://lore.kernel.org/r/5cff26f97e55161b7d56b09ddcf5f8888a5add1d.1689970773.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
		
							parent
							
								
									9abddac583
								
							
						
					
					
						commit
						91721c2d02
					
				
					 3 changed files with 118 additions and 15 deletions
				
			
		| 
						 | 
					@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
 | 
					#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* link_create.netfilter.flags used in LINK_CREATE command for
 | 
				
			||||||
 | 
					 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 | 
					/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 | 
				
			||||||
 * the following extensions:
 | 
					 * the following extensions:
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,8 @@
 | 
				
			||||||
// SPDX-License-Identifier: GPL-2.0
 | 
					// SPDX-License-Identifier: GPL-2.0
 | 
				
			||||||
#include <linux/bpf.h>
 | 
					#include <linux/bpf.h>
 | 
				
			||||||
#include <linux/filter.h>
 | 
					#include <linux/filter.h>
 | 
				
			||||||
 | 
					#include <linux/kmod.h>
 | 
				
			||||||
 | 
					#include <linux/module.h>
 | 
				
			||||||
#include <linux/netfilter.h>
 | 
					#include <linux/netfilter.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <net/netfilter/nf_bpf_link.h>
 | 
					#include <net/netfilter/nf_bpf_link.h>
 | 
				
			||||||
| 
						 | 
					@ -23,8 +25,88 @@ struct bpf_nf_link {
 | 
				
			||||||
	struct nf_hook_ops hook_ops;
 | 
						struct nf_hook_ops hook_ops;
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
	u32 dead;
 | 
						u32 dead;
 | 
				
			||||||
 | 
						const struct nf_defrag_hook *defrag_hook;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct nf_defrag_hook *
 | 
				
			||||||
 | 
					get_proto_defrag_hook(struct bpf_nf_link *link,
 | 
				
			||||||
 | 
							      const struct nf_defrag_hook __rcu *global_hook,
 | 
				
			||||||
 | 
							      const char *mod)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct nf_defrag_hook *hook;
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* RCU protects us from races against module unloading */
 | 
				
			||||||
 | 
						rcu_read_lock();
 | 
				
			||||||
 | 
						hook = rcu_dereference(global_hook);
 | 
				
			||||||
 | 
						if (!hook) {
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
							err = request_module(mod);
 | 
				
			||||||
 | 
							if (err)
 | 
				
			||||||
 | 
								return ERR_PTR(err < 0 ? err : -EINVAL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
							hook = rcu_dereference(global_hook);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (hook && try_module_get(hook->owner)) {
 | 
				
			||||||
 | 
							/* Once we have a refcnt on the module, we no longer need RCU */
 | 
				
			||||||
 | 
							hook = rcu_pointer_handoff(hook);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							WARN_ONCE(!hook, "%s has bad registration", mod);
 | 
				
			||||||
 | 
							hook = ERR_PTR(-ENOENT);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!IS_ERR(hook)) {
 | 
				
			||||||
 | 
							err = hook->enable(link->net);
 | 
				
			||||||
 | 
							if (err) {
 | 
				
			||||||
 | 
								module_put(hook->owner);
 | 
				
			||||||
 | 
								hook = ERR_PTR(err);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return hook;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct nf_defrag_hook __maybe_unused *hook;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch (link->hook_ops.pf) {
 | 
				
			||||||
 | 
					#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 | 
				
			||||||
 | 
						case NFPROTO_IPV4:
 | 
				
			||||||
 | 
							hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
 | 
				
			||||||
 | 
							if (IS_ERR(hook))
 | 
				
			||||||
 | 
								return PTR_ERR(hook);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							link->defrag_hook = hook;
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 | 
				
			||||||
 | 
						case NFPROTO_IPV6:
 | 
				
			||||||
 | 
							hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
 | 
				
			||||||
 | 
							if (IS_ERR(hook))
 | 
				
			||||||
 | 
								return PTR_ERR(hook);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							link->defrag_hook = hook;
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						default:
 | 
				
			||||||
 | 
							return -EAFNOSUPPORT;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct nf_defrag_hook *hook = link->defrag_hook;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!hook)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						hook->disable(link->net);
 | 
				
			||||||
 | 
						module_put(hook->owner);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void bpf_nf_link_release(struct bpf_link *link)
 | 
					static void bpf_nf_link_release(struct bpf_link *link)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
 | 
						struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
 | 
				
			||||||
| 
						 | 
					@ -32,11 +114,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
 | 
				
			||||||
	if (nf_link->dead)
 | 
						if (nf_link->dead)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* prevent hook-not-found warning splat from netfilter core when
 | 
						/* do not double release in case .detach was already called */
 | 
				
			||||||
	 * .detach was already called
 | 
						if (!cmpxchg(&nf_link->dead, 0, 1)) {
 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (!cmpxchg(&nf_link->dead, 0, 1))
 | 
					 | 
				
			||||||
		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
 | 
							nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
 | 
				
			||||||
 | 
							bpf_nf_disable_defrag(nf_link);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void bpf_nf_link_dealloc(struct bpf_link *link)
 | 
					static void bpf_nf_link_dealloc(struct bpf_link *link)
 | 
				
			||||||
| 
						 | 
					@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
 | 
					static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int prio;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (attr->link_create.netfilter.pf) {
 | 
						switch (attr->link_create.netfilter.pf) {
 | 
				
			||||||
	case NFPROTO_IPV4:
 | 
						case NFPROTO_IPV4:
 | 
				
			||||||
	case NFPROTO_IPV6:
 | 
						case NFPROTO_IPV6:
 | 
				
			||||||
| 
						 | 
					@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
 | 
				
			||||||
		return -EAFNOSUPPORT;
 | 
							return -EAFNOSUPPORT;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (attr->link_create.netfilter.flags)
 | 
						if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
 | 
				
			||||||
		return -EOPNOTSUPP;
 | 
							return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* make sure conntrack confirm is always last.
 | 
						/* make sure conntrack confirm is always last */
 | 
				
			||||||
	 *
 | 
						prio = attr->link_create.netfilter.priority;
 | 
				
			||||||
	 * In the future, if userspace can e.g. request defrag, then
 | 
						if (prio == NF_IP_PRI_FIRST)
 | 
				
			||||||
	 * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
 | 
							return -ERANGE;  /* sabotage_in and other warts */
 | 
				
			||||||
	 * should fail.
 | 
						else if (prio == NF_IP_PRI_LAST)
 | 
				
			||||||
	 */
 | 
							return -ERANGE;  /* e.g. conntrack confirm */
 | 
				
			||||||
	switch (attr->link_create.netfilter.priority) {
 | 
						else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
 | 
				
			||||||
	case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
 | 
							 prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
 | 
				
			||||||
	case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
 | 
							return -ERANGE;  /* cannot use defrag if prog runs before nf_defrag */
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -149,6 +232,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	link->net = net;
 | 
						link->net = net;
 | 
				
			||||||
	link->dead = false;
 | 
						link->dead = false;
 | 
				
			||||||
 | 
						link->defrag_hook = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = bpf_link_prime(&link->link, &link_primer);
 | 
						err = bpf_link_prime(&link->link, &link_primer);
 | 
				
			||||||
	if (err) {
 | 
						if (err) {
 | 
				
			||||||
| 
						 | 
					@ -156,8 +240,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 | 
				
			||||||
		return err;
 | 
							return err;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
 | 
				
			||||||
 | 
							err = bpf_nf_enable_defrag(link);
 | 
				
			||||||
 | 
							if (err) {
 | 
				
			||||||
 | 
								bpf_link_cleanup(&link_primer);
 | 
				
			||||||
 | 
								return err;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = nf_register_net_hook(net, &link->hook_ops);
 | 
						err = nf_register_net_hook(net, &link->hook_ops);
 | 
				
			||||||
	if (err) {
 | 
						if (err) {
 | 
				
			||||||
 | 
							bpf_nf_disable_defrag(link);
 | 
				
			||||||
		bpf_link_cleanup(&link_primer);
 | 
							bpf_link_cleanup(&link_primer);
 | 
				
			||||||
		return err;
 | 
							return err;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
 | 
					#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* link_create.netfilter.flags used in LINK_CREATE command for
 | 
				
			||||||
 | 
					 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 | 
					/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 | 
				
			||||||
 * the following extensions:
 | 
					 * the following extensions:
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue