mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES
Convert do_tcp_sendpages() to use sendmsg() with MSG_SPLICE_PAGES rather than directly splicing in the pages itself. do_tcp_sendpages() can then be inlined in subsequent patches into its callers. This allows ->sendpage() to be replaced by something that can handle multiple multipage folios in a single transaction. Signed-off-by: David Howells <dhowells@redhat.com> cc: David Ahern <dsahern@kernel.org> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									270a1c3de4
								
							
						
					
					
						commit
						c5c37af6ec
					
				
					 1 changed files with 7 additions and 151 deletions
				
			
		
							
								
								
									
										158
									
								
								net/ipv4/tcp.c
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								net/ipv4/tcp.c
									
									
									
									
									
								
							| 
						 | 
					@ -974,163 +974,19 @@ static int tcp_wmem_schedule(struct sock *sk, int copy)
 | 
				
			||||||
	return min(copy, sk->sk_forward_alloc);
 | 
						return min(copy, sk->sk_forward_alloc);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
 | 
					 | 
				
			||||||
				      struct page *page, int offset, size_t *size)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct sk_buff *skb = tcp_write_queue_tail(sk);
 | 
					 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
					 | 
				
			||||||
	bool can_coalesce;
 | 
					 | 
				
			||||||
	int copy, i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!skb || (copy = size_goal - skb->len) <= 0 ||
 | 
					 | 
				
			||||||
	    !tcp_skb_can_collapse_to(skb)) {
 | 
					 | 
				
			||||||
new_segment:
 | 
					 | 
				
			||||||
		if (!sk_stream_memory_free(sk))
 | 
					 | 
				
			||||||
			return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
 | 
					 | 
				
			||||||
					   tcp_rtx_and_write_queues_empty(sk));
 | 
					 | 
				
			||||||
		if (!skb)
 | 
					 | 
				
			||||||
			return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_TLS_DEVICE
 | 
					 | 
				
			||||||
		skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
		tcp_skb_entail(sk, skb);
 | 
					 | 
				
			||||||
		copy = size_goal;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (copy > *size)
 | 
					 | 
				
			||||||
		copy = *size;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	i = skb_shinfo(skb)->nr_frags;
 | 
					 | 
				
			||||||
	can_coalesce = skb_can_coalesce(skb, i, page, offset);
 | 
					 | 
				
			||||||
	if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
 | 
					 | 
				
			||||||
		tcp_mark_push(tp, skb);
 | 
					 | 
				
			||||||
		goto new_segment;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (tcp_downgrade_zcopy_pure(sk, skb))
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	copy = tcp_wmem_schedule(sk, copy);
 | 
					 | 
				
			||||||
	if (!copy)
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (can_coalesce) {
 | 
					 | 
				
			||||||
		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		get_page(page);
 | 
					 | 
				
			||||||
		skb_fill_page_desc_noacc(skb, i, page, offset, copy);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!(flags & MSG_NO_SHARED_FRAGS))
 | 
					 | 
				
			||||||
		skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	skb->len += copy;
 | 
					 | 
				
			||||||
	skb->data_len += copy;
 | 
					 | 
				
			||||||
	skb->truesize += copy;
 | 
					 | 
				
			||||||
	sk_wmem_queued_add(sk, copy);
 | 
					 | 
				
			||||||
	sk_mem_charge(sk, copy);
 | 
					 | 
				
			||||||
	WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
 | 
					 | 
				
			||||||
	TCP_SKB_CB(skb)->end_seq += copy;
 | 
					 | 
				
			||||||
	tcp_skb_pcount_set(skb, 0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*size = copy;
 | 
					 | 
				
			||||||
	return skb;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 | 
					ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 | 
				
			||||||
			 size_t size, int flags)
 | 
								 size_t size, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						struct bio_vec bvec;
 | 
				
			||||||
	int mss_now, size_goal;
 | 
						struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
 | 
				
			||||||
	int err;
 | 
					 | 
				
			||||||
	ssize_t copied;
 | 
					 | 
				
			||||||
	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
 | 
						bvec_set_page(&bvec, page, size, offset);
 | 
				
			||||||
	    WARN_ONCE(!sendpage_ok(page),
 | 
						iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
 | 
				
			||||||
		      "page must not be a Slab one and have page_count > 0"))
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Wait for a connection to finish. One exception is TCP Fast Open
 | 
						if (flags & MSG_SENDPAGE_NOTLAST)
 | 
				
			||||||
	 * (passive side) where data is allowed to be sent before a connection
 | 
							msg.msg_flags |= MSG_MORE;
 | 
				
			||||||
	 * is fully established.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
 | 
					 | 
				
			||||||
	    !tcp_passive_fastopen(sk)) {
 | 
					 | 
				
			||||||
		err = sk_stream_wait_connect(sk, &timeo);
 | 
					 | 
				
			||||||
		if (err != 0)
 | 
					 | 
				
			||||||
			goto out_err;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 | 
						return tcp_sendmsg_locked(sk, &msg, size);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	mss_now = tcp_send_mss(sk, &size_goal, flags);
 | 
					 | 
				
			||||||
	copied = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	err = -EPIPE;
 | 
					 | 
				
			||||||
	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 | 
					 | 
				
			||||||
		goto out_err;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	while (size > 0) {
 | 
					 | 
				
			||||||
		struct sk_buff *skb;
 | 
					 | 
				
			||||||
		size_t copy = size;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		skb = tcp_build_frag(sk, size_goal, flags, page, offset, ©);
 | 
					 | 
				
			||||||
		if (!skb)
 | 
					 | 
				
			||||||
			goto wait_for_space;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!copied)
 | 
					 | 
				
			||||||
			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		copied += copy;
 | 
					 | 
				
			||||||
		offset += copy;
 | 
					 | 
				
			||||||
		size -= copy;
 | 
					 | 
				
			||||||
		if (!size)
 | 
					 | 
				
			||||||
			goto out;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (skb->len < size_goal || (flags & MSG_OOB))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (forced_push(tp)) {
 | 
					 | 
				
			||||||
			tcp_mark_push(tp, skb);
 | 
					 | 
				
			||||||
			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
 | 
					 | 
				
			||||||
		} else if (skb == tcp_send_head(sk))
 | 
					 | 
				
			||||||
			tcp_push_one(sk, mss_now);
 | 
					 | 
				
			||||||
		continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
wait_for_space:
 | 
					 | 
				
			||||||
		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 | 
					 | 
				
			||||||
		tcp_push(sk, flags & ~MSG_MORE, mss_now,
 | 
					 | 
				
			||||||
			 TCP_NAGLE_PUSH, size_goal);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		err = sk_stream_wait_memory(sk, &timeo);
 | 
					 | 
				
			||||||
		if (err != 0)
 | 
					 | 
				
			||||||
			goto do_error;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		mss_now = tcp_send_mss(sk, &size_goal, flags);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	if (copied) {
 | 
					 | 
				
			||||||
		tcp_tx_timestamp(sk, sk->sk_tsflags);
 | 
					 | 
				
			||||||
		if (!(flags & MSG_SENDPAGE_NOTLAST))
 | 
					 | 
				
			||||||
			tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return copied;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
do_error:
 | 
					 | 
				
			||||||
	tcp_remove_empty_skb(sk);
 | 
					 | 
				
			||||||
	if (copied)
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
out_err:
 | 
					 | 
				
			||||||
	/* make sure we wake any epoll edge trigger waiter */
 | 
					 | 
				
			||||||
	if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
 | 
					 | 
				
			||||||
		sk->sk_write_space(sk);
 | 
					 | 
				
			||||||
		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return sk_stream_error(sk, flags, err);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 | 
					EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue