forked from mirrors/linux
		
	bpf: add bpf_redirect() helper
Existing bpf_clone_redirect() helper clones skb before redirecting it to RX or TX of destination netdev. Introduce bpf_redirect() helper that does that without cloning. Benchmarked with two hosts using 10G ixgbe NICs. One host is doing line rate pktgen. Another host is configured as: $ tc qdisc add dev $dev ingress $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \ action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop so it receives the packet on $dev and immediately xmits it on $dev + 1 The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program that does bpf_clone_redirect() and performance is 2.0 Mpps $ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \ action bpf run object-file tcbpf1_kern.o section redirect_xmit drop which is using bpf_redirect() - 2.4 Mpps and using cls_bpf with integrated actions as: $ tc filter add dev $dev root pref 10 \ bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1 performance is 2.5 Mpps To summarize: u32+act_bpf using clone_redirect - 2.0 Mpps u32+act_bpf using redirect - 2.4 Mpps cls_bpf using redirect - 2.5 Mpps For comparison linux bridge in this setup is doing 2.1 Mpps and ixgbe rx + drop in ip_rcv - 7.8 Mpps Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									045efa82ff
								
							
						
					
					
						commit
						27b29f6305
					
				
					 9 changed files with 91 additions and 1 deletions
				
			
		|  | @ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb, | ||||||
| 			       const struct qdisc_size_table *stab); | 			       const struct qdisc_size_table *stab); | ||||||
| bool tcf_destroy(struct tcf_proto *tp, bool force); | bool tcf_destroy(struct tcf_proto *tp, bool force); | ||||||
| void tcf_destroy_chain(struct tcf_proto __rcu **fl); | void tcf_destroy_chain(struct tcf_proto __rcu **fl); | ||||||
|  | int skb_do_redirect(struct sk_buff *); | ||||||
| 
 | 
 | ||||||
| /* Reset all TX qdiscs greater then index of a device.  */ | /* Reset all TX qdiscs greater then index of a device.  */ | ||||||
| static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) | static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) | ||||||
|  |  | ||||||
|  | @ -272,6 +272,14 @@ enum bpf_func_id { | ||||||
| 	BPF_FUNC_skb_get_tunnel_key, | 	BPF_FUNC_skb_get_tunnel_key, | ||||||
| 	BPF_FUNC_skb_set_tunnel_key, | 	BPF_FUNC_skb_set_tunnel_key, | ||||||
| 	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */ | 	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */ | ||||||
|  | 	/**
 | ||||||
|  | 	 * bpf_redirect(ifindex, flags) - redirect to another netdev | ||||||
|  | 	 * @ifindex: ifindex of the net device | ||||||
|  | 	 * @flags: bit 0 - if set, redirect to ingress instead of egress | ||||||
|  | 	 *         other bits - reserved | ||||||
|  | 	 * Return: TC_ACT_REDIRECT | ||||||
|  | 	 */ | ||||||
|  | 	BPF_FUNC_redirect, | ||||||
| 	__BPF_FUNC_MAX_ID, | 	__BPF_FUNC_MAX_ID, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -87,6 +87,7 @@ enum { | ||||||
| #define TC_ACT_STOLEN		4 | #define TC_ACT_STOLEN		4 | ||||||
| #define TC_ACT_QUEUED		5 | #define TC_ACT_QUEUED		5 | ||||||
| #define TC_ACT_REPEAT		6 | #define TC_ACT_REPEAT		6 | ||||||
|  | #define TC_ACT_REDIRECT		7 | ||||||
| #define TC_ACT_JUMP		0x10000000 | #define TC_ACT_JUMP		0x10000000 | ||||||
| 
 | 
 | ||||||
| /* Action type identifiers*/ | /* Action type identifiers*/ | ||||||
|  |  | ||||||
|  | @ -3670,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | ||||||
| 	case TC_ACT_QUEUED: | 	case TC_ACT_QUEUED: | ||||||
| 		kfree_skb(skb); | 		kfree_skb(skb); | ||||||
| 		return NULL; | 		return NULL; | ||||||
|  | 	case TC_ACT_REDIRECT: | ||||||
|  | 		/* skb_mac_header check was done by cls/act_bpf, so
 | ||||||
|  | 		 * we can safely push the L2 header back before | ||||||
|  | 		 * redirecting to another netdev | ||||||
|  | 		 */ | ||||||
|  | 		__skb_push(skb, skb->mac_len); | ||||||
|  | 		skb_do_redirect(skb); | ||||||
|  | 		return NULL; | ||||||
| 	default: | 	default: | ||||||
| 		break; | 		break; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { | ||||||
| 	.arg3_type      = ARG_ANYTHING, | 	.arg3_type      = ARG_ANYTHING, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct redirect_info { | ||||||
|  | 	u32 ifindex; | ||||||
|  | 	u32 flags; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static DEFINE_PER_CPU(struct redirect_info, redirect_info); | ||||||
|  | static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) | ||||||
|  | { | ||||||
|  | 	struct redirect_info *ri = this_cpu_ptr(&redirect_info); | ||||||
|  | 
 | ||||||
|  | 	ri->ifindex = ifindex; | ||||||
|  | 	ri->flags = flags; | ||||||
|  | 	return TC_ACT_REDIRECT; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int skb_do_redirect(struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct redirect_info *ri = this_cpu_ptr(&redirect_info); | ||||||
|  | 	struct net_device *dev; | ||||||
|  | 
 | ||||||
|  | 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); | ||||||
|  | 	ri->ifindex = 0; | ||||||
|  | 	if (unlikely(!dev)) { | ||||||
|  | 		kfree_skb(skb); | ||||||
|  | 		return -EINVAL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (BPF_IS_REDIRECT_INGRESS(ri->flags)) | ||||||
|  | 		return dev_forward_skb(dev, skb); | ||||||
|  | 
 | ||||||
|  | 	skb->dev = dev; | ||||||
|  | 	return dev_queue_xmit(skb); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const struct bpf_func_proto bpf_redirect_proto = { | ||||||
|  | 	.func           = bpf_redirect, | ||||||
|  | 	.gpl_only       = false, | ||||||
|  | 	.ret_type       = RET_INTEGER, | ||||||
|  | 	.arg1_type      = ARG_ANYTHING, | ||||||
|  | 	.arg2_type      = ARG_ANYTHING, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||||||
| { | { | ||||||
| 	return task_get_classid((struct sk_buff *) (unsigned long) r1); | 	return task_get_classid((struct sk_buff *) (unsigned long) r1); | ||||||
|  | @ -1607,6 +1649,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | ||||||
| 		return &bpf_skb_get_tunnel_key_proto; | 		return &bpf_skb_get_tunnel_key_proto; | ||||||
| 	case BPF_FUNC_skb_set_tunnel_key: | 	case BPF_FUNC_skb_set_tunnel_key: | ||||||
| 		return bpf_get_skb_set_tunnel_key_proto(); | 		return bpf_get_skb_set_tunnel_key_proto(); | ||||||
|  | 	case BPF_FUNC_redirect: | ||||||
|  | 		return &bpf_redirect_proto; | ||||||
| 	default: | 	default: | ||||||
| 		return sk_filter_func_proto(func_id); | 		return sk_filter_func_proto(func_id); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, | ||||||
| 	case TC_ACT_PIPE: | 	case TC_ACT_PIPE: | ||||||
| 	case TC_ACT_RECLASSIFY: | 	case TC_ACT_RECLASSIFY: | ||||||
| 	case TC_ACT_OK: | 	case TC_ACT_OK: | ||||||
|  | 	case TC_ACT_REDIRECT: | ||||||
| 		action = filter_res; | 		action = filter_res; | ||||||
| 		break; | 		break; | ||||||
| 	case TC_ACT_SHOT: | 	case TC_ACT_SHOT: | ||||||
|  |  | ||||||
|  | @ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code) | ||||||
| 	case TC_ACT_PIPE: | 	case TC_ACT_PIPE: | ||||||
| 	case TC_ACT_STOLEN: | 	case TC_ACT_STOLEN: | ||||||
| 	case TC_ACT_QUEUED: | 	case TC_ACT_QUEUED: | ||||||
|  | 	case TC_ACT_REDIRECT: | ||||||
| 	case TC_ACT_UNSPEC: | 	case TC_ACT_UNSPEC: | ||||||
| 		return code; | 		return code; | ||||||
| 	default: | 	default: | ||||||
|  |  | ||||||
|  | @ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) = | ||||||
| 	(void *) BPF_FUNC_get_current_comm; | 	(void *) BPF_FUNC_get_current_comm; | ||||||
| static int (*bpf_perf_event_read)(void *map, int index) = | static int (*bpf_perf_event_read)(void *map, int index) = | ||||||
| 	(void *) BPF_FUNC_perf_event_read; | 	(void *) BPF_FUNC_perf_event_read; | ||||||
|  | static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = | ||||||
|  | 	(void *) BPF_FUNC_clone_redirect; | ||||||
|  | static int (*bpf_redirect)(int ifindex, int flags) = | ||||||
|  | 	(void *) BPF_FUNC_redirect; | ||||||
| 
 | 
 | ||||||
| /* llvm builtin functions that eBPF C program may use to
 | /* llvm builtin functions that eBPF C program may use to
 | ||||||
|  * emit BPF_LD_ABS and BPF_LD_IND instructions |  * emit BPF_LD_ABS and BPF_LD_IND instructions | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ | ||||||
| #include <uapi/linux/in.h> | #include <uapi/linux/in.h> | ||||||
| #include <uapi/linux/tcp.h> | #include <uapi/linux/tcp.h> | ||||||
| #include <uapi/linux/filter.h> | #include <uapi/linux/filter.h> | ||||||
| 
 | #include <uapi/linux/pkt_cls.h> | ||||||
| #include "bpf_helpers.h" | #include "bpf_helpers.h" | ||||||
| 
 | 
 | ||||||
| /* compiler workaround */ | /* compiler workaround */ | ||||||
|  | @ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb) | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | SEC("redirect_xmit") | ||||||
|  | int _redirect_xmit(struct __sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	return bpf_redirect(skb->ifindex + 1, 0); | ||||||
|  | } | ||||||
|  | SEC("redirect_recv") | ||||||
|  | int _redirect_recv(struct __sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	return bpf_redirect(skb->ifindex + 1, 1); | ||||||
|  | } | ||||||
|  | SEC("clone_redirect_xmit") | ||||||
|  | int _clone_redirect_xmit(struct __sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	bpf_clone_redirect(skb, skb->ifindex + 1, 0); | ||||||
|  | 	return TC_ACT_SHOT; | ||||||
|  | } | ||||||
|  | SEC("clone_redirect_recv") | ||||||
|  | int _clone_redirect_recv(struct __sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	bpf_clone_redirect(skb, skb->ifindex + 1, 1); | ||||||
|  | 	return TC_ACT_SHOT; | ||||||
|  | } | ||||||
| char _license[] SEC("license") = "GPL"; | char _license[] SEC("license") = "GPL"; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Alexei Starovoitov
						Alexei Starovoitov