forked from mirrors/linux
		
	bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c
This patch builds on top of the previous patch in the patchset, which added BPF_LWT_ENCAP_IP mode to bpf_lwt_push_encap. As the encapping can result in the skb needing to go via a different interface/route/dst, bpf programs can indicate this by returning BPF_LWT_REROUTE, which triggers a new route lookup for the skb. v8 changes: fix kbuild errors when LWTUNNEL_BPF is builtin, but IPV6 is a module: as LWTUNNEL_BPF can only be either Y or N, call IPV6 routing functions only if they are built-in. v9 changes: - fixed a kbuild test robot compiler warning; - call IPV6 routing functions via ipv6_stub. v10 changes: removed unnecessary IS_ENABLED and pr_warn_once. v11 changes: fixed a potential dst leak. Signed-off-by: Peter Oskolkov <posk@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
		
							parent
							
								
									9b0a6a9dba
								
							
						
					
					
						commit
						3bd0b15281
					
				
					 1 changed files with 124 additions and 2 deletions
				
			
		| 
						 | 
					@ -17,6 +17,7 @@
 | 
				
			||||||
#include <linux/bpf.h>
 | 
					#include <linux/bpf.h>
 | 
				
			||||||
#include <net/lwtunnel.h>
 | 
					#include <net/lwtunnel.h>
 | 
				
			||||||
#include <net/gre.h>
 | 
					#include <net/gre.h>
 | 
				
			||||||
 | 
					#include <net/ip6_route.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_lwt_prog {
 | 
					struct bpf_lwt_prog {
 | 
				
			||||||
	struct bpf_prog *prog;
 | 
						struct bpf_prog *prog;
 | 
				
			||||||
| 
						 | 
					@ -56,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (ret) {
 | 
						switch (ret) {
 | 
				
			||||||
	case BPF_OK:
 | 
						case BPF_OK:
 | 
				
			||||||
 | 
						case BPF_LWT_REROUTE:
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	case BPF_REDIRECT:
 | 
						case BPF_REDIRECT:
 | 
				
			||||||
| 
						 | 
					@ -88,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int bpf_lwt_input_reroute(struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int err = -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb->protocol == htons(ETH_P_IP)) {
 | 
				
			||||||
 | 
							struct iphdr *iph = ip_hdr(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
 | 
				
			||||||
 | 
										   iph->tos, skb_dst(skb)->dev);
 | 
				
			||||||
 | 
						} else if (skb->protocol == htons(ETH_P_IPV6)) {
 | 
				
			||||||
 | 
							err = ipv6_stub->ipv6_route_input(skb);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							err = -EAFNOSUPPORT;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							goto err;
 | 
				
			||||||
 | 
						return dst_input(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					err:
 | 
				
			||||||
 | 
						kfree_skb(skb);
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bpf_input(struct sk_buff *skb)
 | 
					static int bpf_input(struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct dst_entry *dst = skb_dst(skb);
 | 
						struct dst_entry *dst = skb_dst(skb);
 | 
				
			||||||
| 
						 | 
					@ -99,11 +125,11 @@ static int bpf_input(struct sk_buff *skb)
 | 
				
			||||||
		ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
 | 
							ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
 | 
				
			||||||
		if (ret < 0)
 | 
							if (ret < 0)
 | 
				
			||||||
			return ret;
 | 
								return ret;
 | 
				
			||||||
 | 
							if (ret == BPF_LWT_REROUTE)
 | 
				
			||||||
 | 
								return bpf_lwt_input_reroute(skb);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(!dst->lwtstate->orig_input)) {
 | 
						if (unlikely(!dst->lwtstate->orig_input)) {
 | 
				
			||||||
		pr_warn_once("orig_input not set on dst for prog %s\n",
 | 
					 | 
				
			||||||
			     bpf->out.name);
 | 
					 | 
				
			||||||
		kfree_skb(skb);
 | 
							kfree_skb(skb);
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -148,6 +174,91 @@ static int xmit_check_hhlen(struct sk_buff *skb)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
 | 
				
			||||||
 | 
						int oif = l3mdev ? l3mdev->ifindex : 0;
 | 
				
			||||||
 | 
						struct dst_entry *dst = NULL;
 | 
				
			||||||
 | 
						struct sock *sk;
 | 
				
			||||||
 | 
						struct net *net;
 | 
				
			||||||
 | 
						bool ipv4;
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb->protocol == htons(ETH_P_IP))
 | 
				
			||||||
 | 
							ipv4 = true;
 | 
				
			||||||
 | 
						else if (skb->protocol == htons(ETH_P_IPV6))
 | 
				
			||||||
 | 
							ipv4 = false;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							return -EAFNOSUPPORT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sk = sk_to_full_sk(skb->sk);
 | 
				
			||||||
 | 
						if (sk) {
 | 
				
			||||||
 | 
							if (sk->sk_bound_dev_if)
 | 
				
			||||||
 | 
								oif = sk->sk_bound_dev_if;
 | 
				
			||||||
 | 
							net = sock_net(sk);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							net = dev_net(skb_dst(skb)->dev);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ipv4) {
 | 
				
			||||||
 | 
							struct iphdr *iph = ip_hdr(skb);
 | 
				
			||||||
 | 
							struct flowi4 fl4 = {};
 | 
				
			||||||
 | 
							struct rtable *rt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							fl4.flowi4_oif = oif;
 | 
				
			||||||
 | 
							fl4.flowi4_mark = skb->mark;
 | 
				
			||||||
 | 
							fl4.flowi4_uid = sock_net_uid(net, sk);
 | 
				
			||||||
 | 
							fl4.flowi4_tos = RT_TOS(iph->tos);
 | 
				
			||||||
 | 
							fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
 | 
				
			||||||
 | 
							fl4.flowi4_proto = iph->protocol;
 | 
				
			||||||
 | 
							fl4.daddr = iph->daddr;
 | 
				
			||||||
 | 
							fl4.saddr = iph->saddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rt = ip_route_output_key(net, &fl4);
 | 
				
			||||||
 | 
							if (IS_ERR(rt))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							dst = &rt->dst;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							struct ipv6hdr *iph6 = ipv6_hdr(skb);
 | 
				
			||||||
 | 
							struct flowi6 fl6 = {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							fl6.flowi6_oif = oif;
 | 
				
			||||||
 | 
							fl6.flowi6_mark = skb->mark;
 | 
				
			||||||
 | 
							fl6.flowi6_uid = sock_net_uid(net, sk);
 | 
				
			||||||
 | 
							fl6.flowlabel = ip6_flowinfo(iph6);
 | 
				
			||||||
 | 
							fl6.flowi6_proto = iph6->nexthdr;
 | 
				
			||||||
 | 
							fl6.daddr = iph6->daddr;
 | 
				
			||||||
 | 
							fl6.saddr = iph6->saddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
 | 
				
			||||||
 | 
							if (err || IS_ERR(dst))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (unlikely(dst->error)) {
 | 
				
			||||||
 | 
							dst_release(dst);
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
 | 
				
			||||||
 | 
						 * was done for the previous dst, so we are doing it here again, in
 | 
				
			||||||
 | 
						 * case the new dst needs much more space. The call below is a noop
 | 
				
			||||||
 | 
						 * if there is enough header space in skb.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 | 
				
			||||||
 | 
						if (unlikely(err))
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						skb_dst_drop(skb);
 | 
				
			||||||
 | 
						skb_dst_set(skb, dst);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 | 
				
			||||||
 | 
						if (unlikely(err))
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
 | 
				
			||||||
 | 
						return LWTUNNEL_XMIT_DONE;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bpf_xmit(struct sk_buff *skb)
 | 
					static int bpf_xmit(struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct dst_entry *dst = skb_dst(skb);
 | 
						struct dst_entry *dst = skb_dst(skb);
 | 
				
			||||||
| 
						 | 
					@ -155,11 +266,20 @@ static int bpf_xmit(struct sk_buff *skb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
 | 
						bpf = bpf_lwt_lwtunnel(dst->lwtstate);
 | 
				
			||||||
	if (bpf->xmit.prog) {
 | 
						if (bpf->xmit.prog) {
 | 
				
			||||||
 | 
							__be16 proto = skb->protocol;
 | 
				
			||||||
		int ret;
 | 
							int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
 | 
							ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
 | 
				
			||||||
		switch (ret) {
 | 
							switch (ret) {
 | 
				
			||||||
		case BPF_OK:
 | 
							case BPF_OK:
 | 
				
			||||||
 | 
								/* If the header changed, e.g. via bpf_lwt_push_encap,
 | 
				
			||||||
 | 
								 * BPF_LWT_REROUTE below should have been used if the
 | 
				
			||||||
 | 
								 * protocol was also changed.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (skb->protocol != proto) {
 | 
				
			||||||
 | 
									kfree_skb(skb);
 | 
				
			||||||
 | 
									return -EINVAL;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			/* If the header was expanded, headroom might be too
 | 
								/* If the header was expanded, headroom might be too
 | 
				
			||||||
			 * small for L2 header to come, expand as needed.
 | 
								 * small for L2 header to come, expand as needed.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
| 
						 | 
					@ -170,6 +290,8 @@ static int bpf_xmit(struct sk_buff *skb)
 | 
				
			||||||
			return LWTUNNEL_XMIT_CONTINUE;
 | 
								return LWTUNNEL_XMIT_CONTINUE;
 | 
				
			||||||
		case BPF_REDIRECT:
 | 
							case BPF_REDIRECT:
 | 
				
			||||||
			return LWTUNNEL_XMIT_DONE;
 | 
								return LWTUNNEL_XMIT_DONE;
 | 
				
			||||||
 | 
							case BPF_LWT_REROUTE:
 | 
				
			||||||
 | 
								return bpf_lwt_xmit_reroute(skb);
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			return ret;
 | 
								return ret;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue