forked from mirrors/linux
		
	veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb
Introduce veth_convert_skb_to_xdp_buff routine in order to convert a non-linear skb into a xdp buffer. If the received skb is cloned or shared, veth_convert_skb_to_xdp_buff will copy it in a new skb composed by order-0 pages for the linear and the fragmented area. Moreover veth_convert_skb_to_xdp_buff guarantees we have enough headroom for xdp. This is a preliminary patch to allow attaching xdp programs with frags support on veth devices. Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/8d228b106bc1903571afd1d77e797bffe9a5ea7c.1646989407.git.lorenzo@kernel.org
This commit is contained in:
		
							parent
							
								
									5142239a22
								
							
						
					
					
						commit
						718a18a0c8
					
				
					 2 changed files with 135 additions and 69 deletions
				
			
		| 
						 | 
					@ -433,21 +433,6 @@ static void veth_set_multicast_list(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
 | 
					 | 
				
			||||||
				      int buflen)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct sk_buff *skb;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	skb = build_skb(head, buflen);
 | 
					 | 
				
			||||||
	if (!skb)
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	skb_reserve(skb, headroom);
 | 
					 | 
				
			||||||
	skb_put(skb, len);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return skb;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int veth_select_rxq(struct net_device *dev)
 | 
					static int veth_select_rxq(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return smp_processor_id() % dev->real_num_rx_queues;
 | 
						return smp_processor_id() % dev->real_num_rx_queues;
 | 
				
			||||||
| 
						 | 
					@ -695,16 +680,130 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void veth_xdp_get(struct xdp_buff *xdp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						get_page(virt_to_page(xdp->data));
 | 
				
			||||||
 | 
						if (likely(!xdp_buff_has_frags(xdp)))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < sinfo->nr_frags; i++)
 | 
				
			||||||
 | 
							__skb_frag_ref(&sinfo->frags[i]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 | 
				
			||||||
 | 
										struct xdp_buff *xdp,
 | 
				
			||||||
 | 
										struct sk_buff **pskb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff *skb = *pskb;
 | 
				
			||||||
 | 
						u32 frame_sz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb_shared(skb) || skb_head_is_locked(skb) ||
 | 
				
			||||||
 | 
						    skb_shinfo(skb)->nr_frags) {
 | 
				
			||||||
 | 
							u32 size, len, max_head_size, off;
 | 
				
			||||||
 | 
							struct sk_buff *nskb;
 | 
				
			||||||
 | 
							struct page *page;
 | 
				
			||||||
 | 
							int i, head_off;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* We need a private copy of the skb and data buffers since
 | 
				
			||||||
 | 
							 * the ebpf program can modify it. We segment the original skb
 | 
				
			||||||
 | 
							 * into order-0 pages without linearize it.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Make sure we have enough space for linear and paged area
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
 | 
				
			||||||
 | 
											  VETH_XDP_HEADROOM);
 | 
				
			||||||
 | 
							if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
 | 
				
			||||||
 | 
								goto drop;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Allocate skb head */
 | 
				
			||||||
 | 
							page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 | 
				
			||||||
 | 
							if (!page)
 | 
				
			||||||
 | 
								goto drop;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							nskb = build_skb(page_address(page), PAGE_SIZE);
 | 
				
			||||||
 | 
							if (!nskb) {
 | 
				
			||||||
 | 
								put_page(page);
 | 
				
			||||||
 | 
								goto drop;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							skb_reserve(nskb, VETH_XDP_HEADROOM);
 | 
				
			||||||
 | 
							size = min_t(u32, skb->len, max_head_size);
 | 
				
			||||||
 | 
							if (skb_copy_bits(skb, 0, nskb->data, size)) {
 | 
				
			||||||
 | 
								consume_skb(nskb);
 | 
				
			||||||
 | 
								goto drop;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							skb_put(nskb, size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							skb_copy_header(nskb, skb);
 | 
				
			||||||
 | 
							head_off = skb_headroom(nskb) - skb_headroom(skb);
 | 
				
			||||||
 | 
							skb_headers_offset_update(nskb, head_off);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Allocate paged area of new skb */
 | 
				
			||||||
 | 
							off = size;
 | 
				
			||||||
 | 
							len = skb->len - off;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
 | 
				
			||||||
 | 
								page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 | 
				
			||||||
 | 
								if (!page) {
 | 
				
			||||||
 | 
									consume_skb(nskb);
 | 
				
			||||||
 | 
									goto drop;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								size = min_t(u32, len, PAGE_SIZE);
 | 
				
			||||||
 | 
								skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
 | 
				
			||||||
 | 
								if (skb_copy_bits(skb, off, page_address(page),
 | 
				
			||||||
 | 
										  size)) {
 | 
				
			||||||
 | 
									consume_skb(nskb);
 | 
				
			||||||
 | 
									goto drop;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								len -= size;
 | 
				
			||||||
 | 
								off += size;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							consume_skb(skb);
 | 
				
			||||||
 | 
							skb = nskb;
 | 
				
			||||||
 | 
						} else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
 | 
				
			||||||
 | 
							   pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
 | 
				
			||||||
 | 
							goto drop;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* SKB "head" area always have tailroom for skb_shared_info */
 | 
				
			||||||
 | 
						frame_sz = skb_end_pointer(skb) - skb->head;
 | 
				
			||||||
 | 
						frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 | 
				
			||||||
 | 
						xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
 | 
				
			||||||
 | 
						xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
 | 
				
			||||||
 | 
								 skb_headlen(skb), true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb_is_nonlinear(skb)) {
 | 
				
			||||||
 | 
							skb_shinfo(skb)->xdp_frags_size = skb->data_len;
 | 
				
			||||||
 | 
							xdp_buff_set_frags_flag(xdp);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							xdp_buff_clear_frags_flag(xdp);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						*pskb = skb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					drop:
 | 
				
			||||||
 | 
						consume_skb(skb);
 | 
				
			||||||
 | 
						*pskb = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return -ENOMEM;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
					static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
					struct sk_buff *skb,
 | 
										struct sk_buff *skb,
 | 
				
			||||||
					struct veth_xdp_tx_bq *bq,
 | 
										struct veth_xdp_tx_bq *bq,
 | 
				
			||||||
					struct veth_stats *stats)
 | 
										struct veth_stats *stats)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u32 pktlen, headroom, act, metalen, frame_sz;
 | 
					 | 
				
			||||||
	void *orig_data, *orig_data_end;
 | 
						void *orig_data, *orig_data_end;
 | 
				
			||||||
	struct bpf_prog *xdp_prog;
 | 
						struct bpf_prog *xdp_prog;
 | 
				
			||||||
	int mac_len, delta, off;
 | 
					 | 
				
			||||||
	struct xdp_buff xdp;
 | 
						struct xdp_buff xdp;
 | 
				
			||||||
 | 
						u32 act, metalen;
 | 
				
			||||||
 | 
						int off;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb_prepare_for_gro(skb);
 | 
						skb_prepare_for_gro(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -715,53 +814,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mac_len = skb->data - skb_mac_header(skb);
 | 
						__skb_push(skb, skb->data - skb_mac_header(skb));
 | 
				
			||||||
	pktlen = skb->len + mac_len;
 | 
						if (veth_convert_skb_to_xdp_buff(rq, &xdp, &skb))
 | 
				
			||||||
	headroom = skb_headroom(skb) - mac_len;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (skb_shared(skb) || skb_head_is_locked(skb) ||
 | 
					 | 
				
			||||||
	    skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
 | 
					 | 
				
			||||||
		struct sk_buff *nskb;
 | 
					 | 
				
			||||||
		int size, head_off;
 | 
					 | 
				
			||||||
		void *head, *start;
 | 
					 | 
				
			||||||
		struct page *page;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
 | 
					 | 
				
			||||||
		       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 | 
					 | 
				
			||||||
		if (size > PAGE_SIZE)
 | 
					 | 
				
			||||||
		goto drop;
 | 
							goto drop;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 | 
					 | 
				
			||||||
		if (!page)
 | 
					 | 
				
			||||||
			goto drop;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		head = page_address(page);
 | 
					 | 
				
			||||||
		start = head + VETH_XDP_HEADROOM;
 | 
					 | 
				
			||||||
		if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
 | 
					 | 
				
			||||||
			page_frag_free(head);
 | 
					 | 
				
			||||||
			goto drop;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
 | 
					 | 
				
			||||||
				      skb->len, PAGE_SIZE);
 | 
					 | 
				
			||||||
		if (!nskb) {
 | 
					 | 
				
			||||||
			page_frag_free(head);
 | 
					 | 
				
			||||||
			goto drop;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		skb_copy_header(nskb, skb);
 | 
					 | 
				
			||||||
		head_off = skb_headroom(nskb) - skb_headroom(skb);
 | 
					 | 
				
			||||||
		skb_headers_offset_update(nskb, head_off);
 | 
					 | 
				
			||||||
		consume_skb(skb);
 | 
					 | 
				
			||||||
		skb = nskb;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* SKB "head" area always have tailroom for skb_shared_info */
 | 
					 | 
				
			||||||
	frame_sz = skb_end_pointer(skb) - skb->head;
 | 
					 | 
				
			||||||
	frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 | 
					 | 
				
			||||||
	xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq);
 | 
					 | 
				
			||||||
	xdp_prepare_buff(&xdp, skb->head, skb->mac_header, pktlen, true);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	orig_data = xdp.data;
 | 
						orig_data = xdp.data;
 | 
				
			||||||
	orig_data_end = xdp.data_end;
 | 
						orig_data_end = xdp.data_end;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -771,7 +827,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
	case XDP_PASS:
 | 
						case XDP_PASS:
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case XDP_TX:
 | 
						case XDP_TX:
 | 
				
			||||||
		get_page(virt_to_page(xdp.data));
 | 
							veth_xdp_get(&xdp);
 | 
				
			||||||
		consume_skb(skb);
 | 
							consume_skb(skb);
 | 
				
			||||||
		xdp.rxq->mem = rq->xdp_mem;
 | 
							xdp.rxq->mem = rq->xdp_mem;
 | 
				
			||||||
		if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
 | 
							if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
 | 
				
			||||||
| 
						 | 
					@ -783,7 +839,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
		rcu_read_unlock();
 | 
							rcu_read_unlock();
 | 
				
			||||||
		goto xdp_xmit;
 | 
							goto xdp_xmit;
 | 
				
			||||||
	case XDP_REDIRECT:
 | 
						case XDP_REDIRECT:
 | 
				
			||||||
		get_page(virt_to_page(xdp.data));
 | 
							veth_xdp_get(&xdp);
 | 
				
			||||||
		consume_skb(skb);
 | 
							consume_skb(skb);
 | 
				
			||||||
		xdp.rxq->mem = rq->xdp_mem;
 | 
							xdp.rxq->mem = rq->xdp_mem;
 | 
				
			||||||
		if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
 | 
							if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
 | 
				
			||||||
| 
						 | 
					@ -806,18 +862,27 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* check if bpf_xdp_adjust_head was used */
 | 
						/* check if bpf_xdp_adjust_head was used */
 | 
				
			||||||
	delta = orig_data - xdp.data;
 | 
						off = orig_data - xdp.data;
 | 
				
			||||||
	off = mac_len + delta;
 | 
					 | 
				
			||||||
	if (off > 0)
 | 
						if (off > 0)
 | 
				
			||||||
		__skb_push(skb, off);
 | 
							__skb_push(skb, off);
 | 
				
			||||||
	else if (off < 0)
 | 
						else if (off < 0)
 | 
				
			||||||
		__skb_pull(skb, -off);
 | 
							__skb_pull(skb, -off);
 | 
				
			||||||
	skb->mac_header -= delta;
 | 
					
 | 
				
			||||||
 | 
						skb_reset_mac_header(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* check if bpf_xdp_adjust_tail was used */
 | 
						/* check if bpf_xdp_adjust_tail was used */
 | 
				
			||||||
	off = xdp.data_end - orig_data_end;
 | 
						off = xdp.data_end - orig_data_end;
 | 
				
			||||||
	if (off != 0)
 | 
						if (off != 0)
 | 
				
			||||||
		__skb_put(skb, off); /* positive on grow, negative on shrink */
 | 
							__skb_put(skb, off); /* positive on grow, negative on shrink */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
 | 
				
			||||||
 | 
						 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (xdp_buff_has_frags(&xdp))
 | 
				
			||||||
 | 
							skb->data_len = skb_shinfo(skb)->xdp_frags_size;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							skb->data_len = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb->protocol = eth_type_trans(skb, rq->dev);
 | 
						skb->protocol = eth_type_trans(skb, rq->dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	metalen = xdp.data - xdp.data_meta;
 | 
						metalen = xdp.data - xdp.data_meta;
 | 
				
			||||||
| 
						 | 
					@ -833,7 +898,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
err_xdp:
 | 
					err_xdp:
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
	page_frag_free(xdp.data);
 | 
						xdp_return_buff(&xdp);
 | 
				
			||||||
xdp_xmit:
 | 
					xdp_xmit:
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -528,6 +528,7 @@ void xdp_return_buff(struct xdp_buff *xdp)
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 | 
						__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(xdp_return_buff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
 | 
					/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
 | 
				
			||||||
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
 | 
					void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue