mirror of
https://github.com/torvalds/linux.git
synced 2025-11-02 17:49:03 +02:00
Merge branch 'tcp-receiver-changes'
Eric Dumazet says: ==================== tcp: receiver changes Before accepting an incoming packet: - Make sure to not accept a packet beyond advertized RWIN. If not, increment a new SNMP counter (LINUX_MIB_BEYOND_WINDOW) - ooo packets should update rcv_mss and tp->scaling_ratio. - Make sure to not accept packet beyond sk_rcvbuf limit. This series includes three associated packetdrill tests. ==================== Link: https://patch.msgid.link/20250711114006.480026-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
06baf9bfa6
9 changed files with 152 additions and 14 deletions
|
|
@ -36,6 +36,7 @@ unsigned_long LINUX_MIB_TIMEWAITRECYCLED
|
|||
unsigned_long LINUX_MIB_TIMEWAITKILLED
|
||||
unsigned_long LINUX_MIB_PAWSACTIVEREJECTED
|
||||
unsigned_long LINUX_MIB_PAWSESTABREJECTED
|
||||
unsigned_long LINUX_MIB_BEYOND_WINDOW
|
||||
unsigned_long LINUX_MIB_TSECR_REJECTED
|
||||
unsigned_long LINUX_MIB_PAWS_OLD_ACK
|
||||
unsigned_long LINUX_MIB_PAWS_TW_REJECTED
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
FN(TCP_LISTEN_OVERFLOW) \
|
||||
FN(TCP_OLD_SEQUENCE) \
|
||||
FN(TCP_INVALID_SEQUENCE) \
|
||||
FN(TCP_INVALID_END_SEQUENCE) \
|
||||
FN(TCP_INVALID_ACK_SEQUENCE) \
|
||||
FN(TCP_RESET) \
|
||||
FN(TCP_INVALID_SYN) \
|
||||
|
|
@ -303,8 +304,14 @@ enum skb_drop_reason {
|
|||
SKB_DROP_REASON_TCP_LISTEN_OVERFLOW,
|
||||
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
|
||||
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
|
||||
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
|
||||
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field. */
|
||||
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
|
||||
/**
|
||||
* @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE:
|
||||
* Not acceptable END_SEQ field.
|
||||
* Corresponds to LINUX_MIB_BEYOND_WINDOW.
|
||||
*/
|
||||
SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE,
|
||||
/**
|
||||
* @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
|
||||
* field because ack sequence is not in the window between snd_una
|
||||
|
|
|
|||
|
|
@ -1553,7 +1553,7 @@ __sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
|
|||
}
|
||||
|
||||
static inline bool
|
||||
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
|
||||
sk_rmem_schedule(struct sock *sk, const struct sk_buff *skb, int size)
|
||||
{
|
||||
return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ enum
|
|||
LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */
|
||||
LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */
|
||||
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
|
||||
LINUX_MIB_BEYOND_WINDOW, /* BeyondWindow */
|
||||
LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */
|
||||
LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */
|
||||
LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */
|
||||
|
|
|
|||
|
|
@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
|
|||
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
|
||||
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
|
||||
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
|
||||
SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW),
|
||||
SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
|
||||
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
|
||||
SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED),
|
||||
|
|
|
|||
|
|
@ -4391,14 +4391,22 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk,
|
|||
* (borrowed from freebsd)
|
||||
*/
|
||||
|
||||
static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
|
||||
static enum skb_drop_reason tcp_sequence(const struct sock *sk,
|
||||
u32 seq, u32 end_seq)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (before(end_seq, tp->rcv_wup))
|
||||
return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
|
||||
|
||||
if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
|
||||
return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
|
||||
if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) {
|
||||
if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
|
||||
return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
|
||||
|
||||
/* Only accept this packet if receive queue is empty. */
|
||||
if (skb_queue_len(&sk->sk_receive_queue))
|
||||
return SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE;
|
||||
}
|
||||
|
||||
return SKB_NOT_DROPPED_YET;
|
||||
}
|
||||
|
|
@ -4880,10 +4888,20 @@ static void tcp_ofo_queue(struct sock *sk)
|
|||
static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
|
||||
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
|
||||
|
||||
static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
|
||||
/* Check if this incoming skb can be added to socket receive queues
|
||||
* while satisfying sk->sk_rcvbuf limit.
|
||||
*/
|
||||
static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
|
||||
{
|
||||
unsigned int new_mem = atomic_read(&sk->sk_rmem_alloc) + skb->truesize;
|
||||
|
||||
return new_mem <= sk->sk_rcvbuf;
|
||||
}
|
||||
|
||||
static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
|
||||
unsigned int size)
|
||||
{
|
||||
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
|
||||
if (!tcp_can_ingest(sk, skb) ||
|
||||
!sk_rmem_schedule(sk, skb, size)) {
|
||||
|
||||
if (tcp_prune_queue(sk, skb) < 0)
|
||||
|
|
@ -4915,6 +4933,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
|||
return;
|
||||
}
|
||||
|
||||
tcp_measure_rcv_mss(sk, skb);
|
||||
/* Disable header prediction. */
|
||||
tp->pred_flags = 0;
|
||||
inet_csk_schedule_ack(sk);
|
||||
|
|
@ -5498,7 +5517,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb)
|
|||
tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
|
||||
tp->ooo_last_skb = rb_to_skb(prev);
|
||||
if (!prev || goal <= 0) {
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
|
||||
if (tcp_can_ingest(sk, skb) &&
|
||||
!tcp_under_memory_pressure(sk))
|
||||
break;
|
||||
goal = sk->sk_rcvbuf >> 3;
|
||||
|
|
@ -5532,12 +5551,12 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
|
|||
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
|
||||
|
||||
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
|
||||
if (!tcp_can_ingest(sk, in_skb))
|
||||
tcp_clamp_window(sk);
|
||||
else if (tcp_under_memory_pressure(sk))
|
||||
tcp_adjust_rcv_ssthresh(sk);
|
||||
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
if (tcp_can_ingest(sk, in_skb))
|
||||
return 0;
|
||||
|
||||
tcp_collapse_ofo_queue(sk);
|
||||
|
|
@ -5547,7 +5566,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
|
|||
NULL,
|
||||
tp->copied_seq, tp->rcv_nxt);
|
||||
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
if (tcp_can_ingest(sk, in_skb))
|
||||
return 0;
|
||||
|
||||
/* Collapsing did not help, destructive actions follow.
|
||||
|
|
@ -5555,7 +5574,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
|
|||
|
||||
tcp_prune_ofo_queue(sk, in_skb);
|
||||
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
if (tcp_can_ingest(sk, in_skb))
|
||||
return 0;
|
||||
|
||||
/* If we are really being abused, tell the caller to silently
|
||||
|
|
@ -5881,7 +5900,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
|
|||
|
||||
step1:
|
||||
/* Step 1: check sequence number */
|
||||
reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
|
||||
reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
|
||||
if (reason) {
|
||||
/* RFC793, page 37: "In all states except SYN-SENT, all reset
|
||||
* (RST) segments are validated by checking their SEQ-fields."
|
||||
|
|
@ -5892,6 +5911,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
|
|||
if (!th->rst) {
|
||||
if (th->syn)
|
||||
goto syn_challenge;
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW);
|
||||
if (!tcp_oow_rate_limited(sock_net(sk), skb,
|
||||
LINUX_MIB_TCPACKSKIPPEDSEQ,
|
||||
&tp->last_oow_ack_time))
|
||||
|
|
@ -6110,6 +6130,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
|||
if (tcp_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
|
||||
if (after(TCP_SKB_CB(skb)->end_seq,
|
||||
tp->rcv_nxt + tcp_receive_window(tp)))
|
||||
goto validate;
|
||||
|
||||
if ((int)skb->truesize > sk->sk_forward_alloc)
|
||||
goto step5;
|
||||
|
||||
|
|
@ -6165,7 +6189,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
|||
/*
|
||||
* Standard slow path.
|
||||
*/
|
||||
|
||||
validate:
|
||||
if (!tcp_validate_incoming(sk, skb, th, 1))
|
||||
return;
|
||||
|
||||
|
|
|
|||
27
tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
Normal file
27
tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
--mss=1000
|
||||
|
||||
`./defaults.sh
|
||||
sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
|
||||
|
||||
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
|
||||
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
|
||||
+0 bind(3, ..., ...) = 0
|
||||
+0 listen(3, 1) = 0
|
||||
|
||||
+0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
|
||||
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
|
||||
+.1 < . 1:1(0) ack 1 win 257
|
||||
|
||||
+0 accept(3, ..., ...) = 4
|
||||
|
||||
+0 < . 2001:11001(9000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
|
||||
|
||||
// check that ooo packet properly updates tcpi_rcv_mss
|
||||
+0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
|
||||
|
||||
+0 < . 11001:21001(10000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
--mss=1000
|
||||
|
||||
`./defaults.sh`
|
||||
|
||||
0 `nstat -n`
|
||||
|
||||
// Establish a connection.
|
||||
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
|
||||
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
|
||||
+0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
|
||||
+0 bind(3, ..., ...) = 0
|
||||
+0 listen(3, 1) = 0
|
||||
|
||||
+0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
|
||||
+0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
|
||||
+.1 < . 1:1(0) ack 1 win 257
|
||||
|
||||
+0 accept(3, ..., ...) = 4
|
||||
|
||||
+0 < P. 1:4001(4000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 4001 win 5000
|
||||
|
||||
// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
|
||||
+0 < P. 4001:54001(50000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 4001 win 5000
|
||||
|
||||
// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
|
||||
+1 < P. 5001:55001(50000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 4001 win 5000
|
||||
|
||||
// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
|
||||
+0 < P. 70001:80001(10000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 4001 win 5000
|
||||
|
||||
+0 read(4, ..., 100000) = 4000
|
||||
|
||||
// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
|
||||
+0 < P. 4001:54001(50000) ack 1 win 257
|
||||
+.040 > . 1:1(0) ack 54001 win 0
|
||||
|
||||
// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
|
||||
+0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
|
||||
33
tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
Normal file
33
tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
--mss=1000
|
||||
|
||||
`./defaults.sh`
|
||||
|
||||
0 `nstat -n`
|
||||
|
||||
// Establish a connection.
|
||||
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
|
||||
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
|
||||
+0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
|
||||
+0 bind(3, ..., ...) = 0
|
||||
+0 listen(3, 1) = 0
|
||||
|
||||
+0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
|
||||
+0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
|
||||
+.1 < . 1:1(0) ack 1 win 257
|
||||
|
||||
+0 accept(3, ..., ...) = 4
|
||||
|
||||
+0 < P. 1:20001(20000) ack 1 win 257
|
||||
+.04 > . 1:1(0) ack 20001 win 18000
|
||||
|
||||
+0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
|
||||
+0 < P. 20001:80001(60000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 20001 win 18000
|
||||
|
||||
+0 read(4, ..., 20000) = 20000
|
||||
// A too big packet is accepted if the receive queue is empty
|
||||
+0 < P. 20001:80001(60000) ack 1 win 257
|
||||
+0 > . 1:1(0) ack 80001 win 0
|
||||
|
||||
Loading…
Reference in a new issue