mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	tap: use build_skb() for small packet
We use tun_alloc_skb() which calls sock_alloc_send_pskb() to allocate skb in the past. This socket based method is not suitable for high speed userspace like virtualization which usually: - ignore sk_sndbuf (INT_MAX) and expect to receive the packet as fast as possible - don't want to be block at sendmsg() To eliminate the above overheads, this patch tries to use build_skb() for small packet. We will do this only when the following conditions are all met: - TAP instead of TUN - sk_sndbuf is INT_MAX - caller don't want to be blocked - zerocopy is not used - packet size is smaller enough to use build_skb() Pktgen from guest to host shows ~11% improvement for rx pps of tap: Before: ~1.70Mpps After : ~1.88Mpps What's more important, this makes it possible to implement XDP for tap before creating skbs. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									d0225784be
								
							
						
					
					
						commit
						66ccbc9c87
					
				
					 1 changed files with 91 additions and 21 deletions
				
			
		| 
						 | 
				
			
			@ -105,6 +105,8 @@ do {								\
 | 
			
		|||
} while (0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
 | 
			
		||||
 | 
			
		||||
/* TUN device flags */
 | 
			
		||||
 | 
			
		||||
/* IFF_ATTACH_QUEUE is never stored in device flags,
 | 
			
		||||
| 
						 | 
				
			
			@ -170,6 +172,7 @@ struct tun_file {
 | 
			
		|||
	struct list_head next;
 | 
			
		||||
	struct tun_struct *detached;
 | 
			
		||||
	struct skb_array tx_array;
 | 
			
		||||
	struct page_frag alloc_frag;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct tun_flow_entry {
 | 
			
		||||
| 
						 | 
				
			
			@ -571,6 +574,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 | 
			
		|||
		}
 | 
			
		||||
		if (tun)
 | 
			
		||||
			skb_array_cleanup(&tfile->tx_array);
 | 
			
		||||
		if (tfile->alloc_frag.page)
 | 
			
		||||
			put_page(tfile->alloc_frag.page);
 | 
			
		||||
		sock_put(&tfile->sk);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1190,6 +1195,61 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
 | 
			
		||||
			      int len, int noblock, bool zerocopy)
 | 
			
		||||
{
 | 
			
		||||
	if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (tfile->socket.sk->sk_sndbuf != INT_MAX)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (!noblock)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (zerocopy)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
 | 
			
		||||
	    SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct sk_buff *tun_build_skb(struct tun_file *tfile,
 | 
			
		||||
				     struct iov_iter *from,
 | 
			
		||||
				     int len)
 | 
			
		||||
{
 | 
			
		||||
	struct page_frag *alloc_frag = &tfile->alloc_frag;
 | 
			
		||||
	struct sk_buff *skb;
 | 
			
		||||
	int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
 | 
			
		||||
		     SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 | 
			
		||||
	char *buf;
 | 
			
		||||
	size_t copied;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
 | 
			
		||||
		return ERR_PTR(-ENOMEM);
 | 
			
		||||
 | 
			
		||||
	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 | 
			
		||||
	copied = copy_page_from_iter(alloc_frag->page,
 | 
			
		||||
				     alloc_frag->offset + TUN_RX_PAD,
 | 
			
		||||
				     len, from);
 | 
			
		||||
	if (copied != len)
 | 
			
		||||
		return ERR_PTR(-EFAULT);
 | 
			
		||||
 | 
			
		||||
	skb = build_skb(buf, buflen);
 | 
			
		||||
	if (!skb)
 | 
			
		||||
		return ERR_PTR(-ENOMEM);
 | 
			
		||||
 | 
			
		||||
	skb_reserve(skb, TUN_RX_PAD);
 | 
			
		||||
	skb_put(skb, len);
 | 
			
		||||
	get_page(alloc_frag->page);
 | 
			
		||||
	alloc_frag->offset += buflen;
 | 
			
		||||
 | 
			
		||||
	return skb;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Get packet from user space buffer */
 | 
			
		||||
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 | 
			
		||||
			    void *msg_control, struct iov_iter *from,
 | 
			
		||||
| 
						 | 
				
			
			@ -1263,6 +1323,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 | 
			
		|||
			zerocopy = true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
 | 
			
		||||
		skb = tun_build_skb(tfile, from, len);
 | 
			
		||||
		if (IS_ERR(skb)) {
 | 
			
		||||
			this_cpu_inc(tun->pcpu_stats->rx_dropped);
 | 
			
		||||
			return PTR_ERR(skb);
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		if (!zerocopy) {
 | 
			
		||||
			copylen = len;
 | 
			
		||||
			if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
 | 
			
		||||
| 
						 | 
				
			
			@ -1288,6 +1355,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 | 
			
		|||
			kfree_skb(skb);
 | 
			
		||||
			return -EFAULT;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
 | 
			
		||||
		this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
 | 
			
		||||
| 
						 | 
				
			
			@ -2377,6 +2445,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 | 
			
		|||
	tfile->sk.sk_write_space = tun_sock_write_space;
 | 
			
		||||
	tfile->sk.sk_sndbuf = INT_MAX;
 | 
			
		||||
 | 
			
		||||
	tfile->alloc_frag.page = NULL;
 | 
			
		||||
 | 
			
		||||
	file->private_data = tfile;
 | 
			
		||||
	INIT_LIST_HEAD(&tfile->next);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue