mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	SUNRPC: Handle TCP socket sends with kernel_sendpage() again
Daire Byrne reports a ~50% aggregrate throughput regression on his Linux NFS server after commitda1661b93b("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends"), which replaced kernel_send_page() calls in NFSD's socket send path with calls to sock_sendmsg() using iov_iter. Investigation showed that tcp_sendmsg() was not using zero-copy to send the xdr_buf's bvec pages, but instead was relying on memcpy. This means copying every byte of a large NFS READ payload. It looks like TLS sockets do indeed support a ->sendpage method, so it's really not necessary to use xprt_sock_sendmsg() to support TLS fully on the server. A mechanical reversion ofda1661b93bis not possible at this point, but we can re-implement the server's TCP socket sendmsg path using kernel_sendpage(). Reported-by: Daire Byrne <daire@dneg.com> BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439 Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
		
							parent
							
								
									d6c9e4368c
								
							
						
					
					
						commit
						4a85a6a332
					
				
					 1 changed files with 85 additions and 1 deletions
				
			
		| 
						 | 
					@ -1062,6 +1062,90 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 | 
				
			||||||
	return 0;	/* record not complete */
 | 
						return 0;	/* record not complete */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
 | 
				
			||||||
 | 
								      int flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return kernel_sendpage(sock, virt_to_page(vec->iov_base),
 | 
				
			||||||
 | 
								       offset_in_page(vec->iov_base),
 | 
				
			||||||
 | 
								       vec->iov_len, flags);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * kernel_sendpage() is used exclusively to reduce the number of
 | 
				
			||||||
 | 
					 * copy operations in this path. Therefore the caller must ensure
 | 
				
			||||||
 | 
					 * that the pages backing @xdr are unchanging.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * In addition, the logic assumes that * .bv_len is never larger
 | 
				
			||||||
 | 
					 * than PAGE_SIZE.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
 | 
				
			||||||
 | 
								   struct xdr_buf *xdr, rpc_fraghdr marker,
 | 
				
			||||||
 | 
								   unsigned int *sentp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct kvec *head = xdr->head;
 | 
				
			||||||
 | 
						const struct kvec *tail = xdr->tail;
 | 
				
			||||||
 | 
						struct kvec rm = {
 | 
				
			||||||
 | 
							.iov_base	= &marker,
 | 
				
			||||||
 | 
							.iov_len	= sizeof(marker),
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
						int flags, ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						*sentp = 0;
 | 
				
			||||||
 | 
						xdr_alloc_bvec(xdr, GFP_KERNEL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						msg->msg_flags = MSG_MORE;
 | 
				
			||||||
 | 
						ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						*sentp += ret;
 | 
				
			||||||
 | 
						if (ret != rm.iov_len)
 | 
				
			||||||
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0;
 | 
				
			||||||
 | 
						ret = svc_tcp_send_kvec(sock, head, flags);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						*sentp += ret;
 | 
				
			||||||
 | 
						if (ret != head->iov_len)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (xdr->page_len) {
 | 
				
			||||||
 | 
							unsigned int offset, len, remaining;
 | 
				
			||||||
 | 
							struct bio_vec *bvec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bvec = xdr->bvec;
 | 
				
			||||||
 | 
							offset = xdr->page_base;
 | 
				
			||||||
 | 
							remaining = xdr->page_len;
 | 
				
			||||||
 | 
							flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
 | 
				
			||||||
 | 
							while (remaining > 0) {
 | 
				
			||||||
 | 
								if (remaining <= PAGE_SIZE && tail->iov_len == 0)
 | 
				
			||||||
 | 
									flags = 0;
 | 
				
			||||||
 | 
								len = min(remaining, bvec->bv_len);
 | 
				
			||||||
 | 
								ret = kernel_sendpage(sock, bvec->bv_page,
 | 
				
			||||||
 | 
										      bvec->bv_offset + offset,
 | 
				
			||||||
 | 
										      len, flags);
 | 
				
			||||||
 | 
								if (ret < 0)
 | 
				
			||||||
 | 
									return ret;
 | 
				
			||||||
 | 
								*sentp += ret;
 | 
				
			||||||
 | 
								if (ret != len)
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
								remaining -= len;
 | 
				
			||||||
 | 
								offset = 0;
 | 
				
			||||||
 | 
								bvec++;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (tail->iov_len) {
 | 
				
			||||||
 | 
							ret = svc_tcp_send_kvec(sock, tail, 0);
 | 
				
			||||||
 | 
							if (ret < 0)
 | 
				
			||||||
 | 
								return ret;
 | 
				
			||||||
 | 
							*sentp += ret;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * svc_tcp_sendto - Send out a reply on a TCP socket
 | 
					 * svc_tcp_sendto - Send out a reply on a TCP socket
 | 
				
			||||||
 * @rqstp: completed svc_rqst
 | 
					 * @rqstp: completed svc_rqst
 | 
				
			||||||
| 
						 | 
					@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 | 
				
			||||||
	mutex_lock(&xprt->xpt_mutex);
 | 
						mutex_lock(&xprt->xpt_mutex);
 | 
				
			||||||
	if (svc_xprt_is_dead(xprt))
 | 
						if (svc_xprt_is_dead(xprt))
 | 
				
			||||||
		goto out_notconn;
 | 
							goto out_notconn;
 | 
				
			||||||
	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
 | 
						err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent);
 | 
				
			||||||
	xdr_free_bvec(xdr);
 | 
						xdr_free_bvec(xdr);
 | 
				
			||||||
	trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
 | 
						trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
 | 
				
			||||||
	if (err < 0 || sent != (xdr->len + sizeof(marker)))
 | 
						if (err < 0 || sent != (xdr->len + sizeof(marker)))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue