mirror of
https://github.com/torvalds/linux.git
synced 2025-11-05 03:00:13 +02:00
tcp: fix sk_rcvbuf overshoot
Current autosizing in tcp_rcv_space_adjust() is too aggressive. Instead of betting on possible losses and over estimate BDP, it is better to only account for slow start. The following patch is then adding a more precise tuning in the events of packet losses. Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20250513193919.1089692-3-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
c1269d3d12
commit
65c5287892
1 changed files with 25 additions and 34 deletions
|
|
@ -747,6 +747,29 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tcp_rcvbuf_grow(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct net *net = sock_net(sk);
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
int rcvwin, rcvbuf, cap;
|
||||||
|
|
||||||
|
if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
|
||||||
|
(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* slow start: allow the sender to double its rate. */
|
||||||
|
rcvwin = tp->rcvq_space.space << 1;
|
||||||
|
|
||||||
|
cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
|
||||||
|
|
||||||
|
rcvbuf = min_t(u32, tcp_space_from_win(sk, rcvwin), cap);
|
||||||
|
if (rcvbuf > sk->sk_rcvbuf) {
|
||||||
|
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
|
||||||
|
/* Make the window clamp follow along. */
|
||||||
|
WRITE_ONCE(tp->window_clamp,
|
||||||
|
tcp_win_from_space(sk, rcvbuf));
|
||||||
|
}
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* This function should be called every time data is copied to user space.
|
* This function should be called every time data is copied to user space.
|
||||||
* It calculates the appropriate TCP receive buffer space.
|
* It calculates the appropriate TCP receive buffer space.
|
||||||
|
|
@ -771,42 +794,10 @@ void tcp_rcv_space_adjust(struct sock *sk)
|
||||||
|
|
||||||
trace_tcp_rcvbuf_grow(sk, time);
|
trace_tcp_rcvbuf_grow(sk, time);
|
||||||
|
|
||||||
/* A bit of theory :
|
|
||||||
* copied = bytes received in previous RTT, our base window
|
|
||||||
* To cope with packet losses, we need a 2x factor
|
|
||||||
* To cope with slow start, and sender growing its cwin by 100 %
|
|
||||||
* every RTT, we need a 4x factor, because the ACK we are sending
|
|
||||||
* now is for the next RTT, not the current one :
|
|
||||||
* <prev RTT . ><current RTT .. ><next RTT .... >
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
|
|
||||||
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
|
|
||||||
u64 rcvwin, grow;
|
|
||||||
int rcvbuf;
|
|
||||||
|
|
||||||
/* minimal window to cope with packet losses, assuming
|
|
||||||
* steady state. Add some cushion because of small variations.
|
|
||||||
*/
|
|
||||||
rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
|
|
||||||
|
|
||||||
/* Accommodate for sender rate increase (eg. slow start) */
|
|
||||||
grow = rcvwin * (copied - tp->rcvq_space.space);
|
|
||||||
do_div(grow, tp->rcvq_space.space);
|
|
||||||
rcvwin += (grow << 1);
|
|
||||||
|
|
||||||
rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
|
|
||||||
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
|
|
||||||
if (rcvbuf > sk->sk_rcvbuf) {
|
|
||||||
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
|
|
||||||
|
|
||||||
/* Make the window clamp follow along. */
|
|
||||||
WRITE_ONCE(tp->window_clamp,
|
|
||||||
tcp_win_from_space(sk, rcvbuf));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tp->rcvq_space.space = copied;
|
tp->rcvq_space.space = copied;
|
||||||
|
|
||||||
|
tcp_rcvbuf_grow(sk);
|
||||||
|
|
||||||
new_measure:
|
new_measure:
|
||||||
tp->rcvq_space.seq = tp->copied_seq;
|
tp->rcvq_space.seq = tp->copied_seq;
|
||||||
tp->rcvq_space.time = tp->tcp_mstamp;
|
tp->rcvq_space.time = tp->tcp_mstamp;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue