mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	vhost_net: basic polling support
This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
		
							parent
							
								
									d4a60603fa
								
							
						
					
					
						commit
						0308813724
					
				
					 4 changed files with 94 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -287,6 +287,43 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
 | 
			
		|||
	rcu_read_unlock_bh();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline unsigned long busy_clock(void)
 | 
			
		||||
{
 | 
			
		||||
	return local_clock() >> 10;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool vhost_can_busy_poll(struct vhost_dev *dev,
 | 
			
		||||
				unsigned long endtime)
 | 
			
		||||
{
 | 
			
		||||
	return likely(!need_resched()) &&
 | 
			
		||||
	       likely(!time_after(busy_clock(), endtime)) &&
 | 
			
		||||
	       likely(!signal_pending(current)) &&
 | 
			
		||||
	       !vhost_has_work(dev);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 | 
			
		||||
				    struct vhost_virtqueue *vq,
 | 
			
		||||
				    struct iovec iov[], unsigned int iov_size,
 | 
			
		||||
				    unsigned int *out_num, unsigned int *in_num)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long uninitialized_var(endtime);
 | 
			
		||||
	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 | 
			
		||||
				    out_num, in_num, NULL, NULL);
 | 
			
		||||
 | 
			
		||||
	if (r == vq->num && vq->busyloop_timeout) {
 | 
			
		||||
		preempt_disable();
 | 
			
		||||
		endtime = busy_clock() + vq->busyloop_timeout;
 | 
			
		||||
		while (vhost_can_busy_poll(vq->dev, endtime) &&
 | 
			
		||||
		       vhost_vq_avail_empty(vq->dev, vq))
 | 
			
		||||
			cpu_relax_lowlatency();
 | 
			
		||||
		preempt_enable();
 | 
			
		||||
		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 | 
			
		||||
					out_num, in_num, NULL, NULL);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Expects to be always run from workqueue - which acts as
 | 
			
		||||
 * read-size critical section for our kind of RCU. */
 | 
			
		||||
static void handle_tx(struct vhost_net *net)
 | 
			
		||||
| 
						 | 
				
			
			@ -331,10 +368,9 @@ static void handle_tx(struct vhost_net *net)
 | 
			
		|||
			      % UIO_MAXIOV == nvq->done_idx))
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		head = vhost_get_vq_desc(vq, vq->iov,
 | 
			
		||||
					 ARRAY_SIZE(vq->iov),
 | 
			
		||||
					 &out, &in,
 | 
			
		||||
					 NULL, NULL);
 | 
			
		||||
		head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
 | 
			
		||||
						ARRAY_SIZE(vq->iov),
 | 
			
		||||
						&out, &in);
 | 
			
		||||
		/* On error, stop handling until the next kick. */
 | 
			
		||||
		if (unlikely(head < 0))
 | 
			
		||||
			break;
 | 
			
		||||
| 
						 | 
				
			
			@ -435,6 +471,38 @@ static int peek_head_len(struct sock *sk)
 | 
			
		|||
	return len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
 | 
			
		||||
	struct vhost_virtqueue *vq = &nvq->vq;
 | 
			
		||||
	unsigned long uninitialized_var(endtime);
 | 
			
		||||
	int len = peek_head_len(sk);
 | 
			
		||||
 | 
			
		||||
	if (!len && vq->busyloop_timeout) {
 | 
			
		||||
		/* Both tx vq and rx socket were polled here */
 | 
			
		||||
		mutex_lock(&vq->mutex);
 | 
			
		||||
		vhost_disable_notify(&net->dev, vq);
 | 
			
		||||
 | 
			
		||||
		preempt_disable();
 | 
			
		||||
		endtime = busy_clock() + vq->busyloop_timeout;
 | 
			
		||||
 | 
			
		||||
		while (vhost_can_busy_poll(&net->dev, endtime) &&
 | 
			
		||||
		       skb_queue_empty(&sk->sk_receive_queue) &&
 | 
			
		||||
		       vhost_vq_avail_empty(&net->dev, vq))
 | 
			
		||||
			cpu_relax_lowlatency();
 | 
			
		||||
 | 
			
		||||
		preempt_enable();
 | 
			
		||||
 | 
			
		||||
		if (vhost_enable_notify(&net->dev, vq))
 | 
			
		||||
			vhost_poll_queue(&vq->poll);
 | 
			
		||||
		mutex_unlock(&vq->mutex);
 | 
			
		||||
 | 
			
		||||
		len = peek_head_len(sk);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* This is a multi-buffer version of vhost_get_desc, that works if
 | 
			
		||||
 *	vq has read descriptors only.
 | 
			
		||||
 * @vq		- the relevant virtqueue
 | 
			
		||||
| 
						 | 
				
			
			@ -553,7 +621,7 @@ static void handle_rx(struct vhost_net *net)
 | 
			
		|||
		vq->log : NULL;
 | 
			
		||||
	mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 | 
			
		||||
 | 
			
		||||
	while ((sock_len = peek_head_len(sock->sk))) {
 | 
			
		||||
	while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
 | 
			
		||||
		sock_len += sock_hlen;
 | 
			
		||||
		vhost_len = sock_len + vhost_hlen;
 | 
			
		||||
		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -303,6 +303,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 | 
			
		|||
	vq->memory = NULL;
 | 
			
		||||
	vhost_reset_is_le(vq);
 | 
			
		||||
	vhost_disable_cross_endian(vq);
 | 
			
		||||
	vq->busyloop_timeout = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vhost_worker(void *data)
 | 
			
		||||
| 
						 | 
				
			
			@ -937,6 +938,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
 | 
			
		|||
	case VHOST_GET_VRING_ENDIAN:
 | 
			
		||||
		r = vhost_get_vring_endian(vq, idx, argp);
 | 
			
		||||
		break;
 | 
			
		||||
	case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
 | 
			
		||||
		if (copy_from_user(&s, argp, sizeof(s))) {
 | 
			
		||||
			r = -EFAULT;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		vq->busyloop_timeout = s.num;
 | 
			
		||||
		break;
 | 
			
		||||
	case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
 | 
			
		||||
		s.index = idx;
 | 
			
		||||
		s.num = vq->busyloop_timeout;
 | 
			
		||||
		if (copy_to_user(argp, &s, sizeof(s)))
 | 
			
		||||
			r = -EFAULT;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		r = -ENOIOCTLCMD;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,7 @@ struct vhost_virtqueue {
 | 
			
		|||
	/* Ring endianness requested by userspace for cross-endian support. */
 | 
			
		||||
	bool user_be;
 | 
			
		||||
#endif
 | 
			
		||||
	u32 busyloop_timeout;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct vhost_dev {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -126,6 +126,12 @@ struct vhost_memory {
 | 
			
		|||
#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
 | 
			
		||||
/* Set eventfd to signal an error */
 | 
			
		||||
#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
 | 
			
		||||
/* Set busy loop timeout (in us) */
 | 
			
		||||
#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23,	\
 | 
			
		||||
					 struct vhost_vring_state)
 | 
			
		||||
/* Get busy loop timeout (in us) */
 | 
			
		||||
#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,	\
 | 
			
		||||
					 struct vhost_vring_state)
 | 
			
		||||
 | 
			
		||||
/* VHOST_NET specific defines */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue