forked from mirrors/linux
		
	tcp: reorganize tcp_sock fast path variables
The variables are organized according in the following way: - TX read-mostly hotpath cache lines - TXRX read-mostly hotpath cache lines - RX read-mostly hotpath cache lines - TX read-write hotpath cache line - TXRX read-write hotpath cache line - RX read-write hotpath cache line Fastpath cachelines end after rcvq_space. Cache line boundaries are enforced only between read-mostly and read-write. That is, if read-mostly tx cachelines bleed into read-mostly txrx cachelines, we do not care. We care about the boundaries between read and write cachelines because we want to prevent false sharing. Fast path variables span cache lines before change: 12 Fast path variables span cache lines after change: 8 Suggested-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Wei Wang <weiwan@google.com> Signed-off-by: Coco Li <lixiaoyan@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/20231204201232.520025-3-lixiaoyan@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									43a71cd66b
								
							
						
					
					
						commit
						d5fed5addb
					
				
					 2 changed files with 234 additions and 121 deletions
				
			
		|  | @ -194,36 +194,62 @@ static inline bool tcp_rsk_used_ao(const struct request_sock *req) | ||||||
| #define TCP_RMEM_TO_WIN_SCALE 8 | #define TCP_RMEM_TO_WIN_SCALE 8 | ||||||
| 
 | 
 | ||||||
| struct tcp_sock { | struct tcp_sock { | ||||||
|  | 	/* Cacheline organization can be found documented in
 | ||||||
|  | 	 * Documentation/networking/net_cachelines/tcp_sock.rst. | ||||||
|  | 	 * Please update the document when adding new fields. | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
| 	/* inet_connection_sock has to be the first member of tcp_sock */ | 	/* inet_connection_sock has to be the first member of tcp_sock */ | ||||||
| 	struct inet_connection_sock	inet_conn; | 	struct inet_connection_sock	inet_conn; | ||||||
| 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/ | 
 | ||||||
|  | 	/* TX read-mostly hotpath cache lines */ | ||||||
|  | 	__cacheline_group_begin(tcp_sock_read_tx); | ||||||
|  | 	/* timestamp of last sent data packet (for restart window) */ | ||||||
|  | 	u32	max_window;	/* Maximal window ever seen from peer	*/ | ||||||
|  | 	u32	rcv_ssthresh;	/* Current window clamp			*/ | ||||||
|  | 	u32	reordering;	/* Packet reordering metric.		*/ | ||||||
|  | 	u32	notsent_lowat;	/* TCP_NOTSENT_LOWAT */ | ||||||
| 	u16	gso_segs;	/* Max number of segs per GSO packet	*/ | 	u16	gso_segs;	/* Max number of segs per GSO packet	*/ | ||||||
|  | 	/* from STCP, retrans queue hinting */ | ||||||
|  | 	struct sk_buff *lost_skb_hint; | ||||||
|  | 	struct sk_buff *retransmit_skb_hint; | ||||||
|  | 	__cacheline_group_end(tcp_sock_read_tx); | ||||||
| 
 | 
 | ||||||
| /*
 | 	/* TXRX read-mostly hotpath cache lines */ | ||||||
|  *	Header prediction flags | 	__cacheline_group_begin(tcp_sock_read_txrx); | ||||||
|  *	0x5?10 << 16 + snd_wnd in net byte order | 	u32	tsoffset;	/* timestamp offset */ | ||||||
|  */ | 	u32	snd_wnd;	/* The window we expect to receive	*/ | ||||||
| 	__be32	pred_flags; | 	u32	mss_cache;	/* Cached effective mss, not including SACKS */ | ||||||
|  | 	u32	snd_cwnd;	/* Sending congestion window		*/ | ||||||
|  | 	u32	prr_out;	/* Total number of pkts sent during Recovery. */ | ||||||
|  | 	u32	lost_out;	/* Lost packets			*/ | ||||||
|  | 	u32	sacked_out;	/* SACK'd packets			*/ | ||||||
|  | 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/ | ||||||
|  | 	u8	chrono_type : 2,	/* current chronograph type */ | ||||||
|  | 		repair      : 1, | ||||||
|  | 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */ | ||||||
|  | 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ | ||||||
|  | 	__cacheline_group_end(tcp_sock_read_txrx); | ||||||
| 
 | 
 | ||||||
| /*
 | 	/* RX read-mostly hotpath cache lines */ | ||||||
|  *	RFC793 variables by their proper names. This means you can | 	__cacheline_group_begin(tcp_sock_read_rx); | ||||||
|  *	read the code and the spec side by side (and laugh ...) | 	u32	copied_seq;	/* Head of yet unread data */ | ||||||
|  *	See RFC793 and RFC1122. The RFC writes these in capitals. | 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ | ||||||
|  */ | 	u32	snd_wl1;	/* Sequence for window update		*/ | ||||||
| 	u64	bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
 | 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ | ||||||
| 				 * sum(delta(rcv_nxt)), or how many bytes | 	u32	rttvar_us;	/* smoothed mdev_max			*/ | ||||||
| 				 * were acked. | 	u32	retrans_out;	/* Retransmitted packets out		*/ | ||||||
| 				 */ | 	u16	advmss;		/* Advertised MSS			*/ | ||||||
| 	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
 | 	u16	urg_data;	/* Saved octet of OOB data and control flags */ | ||||||
| 				 * total number of segments in. | 	u32	lost;		/* Total data packets lost incl. rexmits */ | ||||||
| 				 */ | 	struct  minmax rtt_min; | ||||||
| 	u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
 | 	/* OOO segments go in this rbtree. Socket lock must be held. */ | ||||||
| 				 * total number of data segments in. | 	struct rb_root	out_of_order_queue; | ||||||
| 				 */ | 	u32	snd_ssthresh;	/* Slow start size threshold		*/ | ||||||
|  	u32	rcv_nxt;	/* What we want to receive next 	*/ | 	__cacheline_group_end(tcp_sock_read_rx); | ||||||
| 	u32	copied_seq;	/* Head of yet unread data		*/ | 
 | ||||||
| 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/ | 	/* TX read-write hotpath cache lines */ | ||||||
|  	u32	snd_nxt;	/* Next sequence we send		*/ | 	__cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; | ||||||
| 	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
 | 	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
 | ||||||
| 				 * The total number of segments sent. | 				 * The total number of segments sent. | ||||||
| 				 */ | 				 */ | ||||||
|  | @ -233,32 +259,103 @@ struct tcp_sock { | ||||||
| 	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut
 | 	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut
 | ||||||
| 				 * total number of data bytes sent. | 				 * total number of data bytes sent. | ||||||
| 				 */ | 				 */ | ||||||
|  | 	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ | ||||||
|  | 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ | ||||||
|  | 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ | ||||||
|  | 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ | ||||||
|  | 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ | ||||||
|  | 	u32	lsndtime; | ||||||
|  | 	u32	mdev_us;	/* medium deviation			*/ | ||||||
|  | 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ | ||||||
|  | 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ | ||||||
|  | 	u64	tcp_mstamp;	/* most recent packet received/sent */ | ||||||
|  | 	u32	rtt_seq;	/* sequence number to update rttvar	*/ | ||||||
|  | 	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ | ||||||
|  | 	struct sk_buff *highest_sack;   /* skb just after the highest
 | ||||||
|  | 					 * skb with SACKed bit set | ||||||
|  | 					 * (validity guaranteed only if | ||||||
|  | 					 * sacked_out > 0) | ||||||
|  | 					 */ | ||||||
|  | 	u8	ecn_flags;	/* ECN status bits.			*/ | ||||||
|  | 	__cacheline_group_end(tcp_sock_write_tx); | ||||||
|  | 
 | ||||||
|  | 	/* TXRX read-write hotpath cache lines */ | ||||||
|  | 	__cacheline_group_begin(tcp_sock_write_txrx); | ||||||
|  | /*
 | ||||||
|  |  *	Header prediction flags | ||||||
|  |  *	0x5?10 << 16 + snd_wnd in net byte order | ||||||
|  |  */ | ||||||
|  | 	__be32	pred_flags; | ||||||
|  | 	u32	rcv_nxt;	/* What we want to receive next		*/ | ||||||
|  | 	u32	snd_nxt;	/* Next sequence we send		*/ | ||||||
|  | 	u32	snd_una;	/* First byte we want an ack for	*/ | ||||||
|  | 	u32	window_clamp;	/* Maximal window to advertise		*/ | ||||||
|  | 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */ | ||||||
|  | 	u32	packets_out;	/* Packets which are "in flight"	*/ | ||||||
|  | 	u32	snd_up;		/* Urgent pointer		*/ | ||||||
|  | 	u32	delivered;	/* Total data packets delivered incl. rexmits */ | ||||||
|  | 	u32	delivered_ce;	/* Like the above but only ECE marked packets */ | ||||||
|  | 	u32	app_limited;	/* limited until "delivered" reaches this val */ | ||||||
|  | 	u32	rcv_wnd;	/* Current receiver window		*/ | ||||||
|  | /*
 | ||||||
|  |  *      Options received (usually on last packet, some only on SYN packets). | ||||||
|  |  */ | ||||||
|  | 	struct tcp_options_received rx_opt; | ||||||
|  | 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */ | ||||||
|  | 		rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */ | ||||||
|  | 	__cacheline_group_end(tcp_sock_write_txrx); | ||||||
|  | 
 | ||||||
|  | 	/* RX read-write hotpath cache lines */ | ||||||
|  | 	__cacheline_group_begin(tcp_sock_write_rx); | ||||||
|  | 	u64	bytes_received; | ||||||
|  | 				/* RFC4898 tcpEStatsAppHCThruOctetsReceived
 | ||||||
|  | 				 * sum(delta(rcv_nxt)), or how many bytes | ||||||
|  | 				 * were acked. | ||||||
|  | 				 */ | ||||||
|  | 	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
 | ||||||
|  | 				 * total number of segments in. | ||||||
|  | 				 */ | ||||||
|  | 	u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
 | ||||||
|  | 				 * total number of data segments in. | ||||||
|  | 				 */ | ||||||
|  | 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/ | ||||||
|  | 	u32	max_packets_out;  /* max packets_out in last window */ | ||||||
|  | 	u32	cwnd_usage_seq;  /* right edge of cwnd usage tracking flight */ | ||||||
|  | 	u32	rate_delivered;    /* saved rate sample: packets delivered */ | ||||||
|  | 	u32	rate_interval_us;  /* saved rate sample: time elapsed */ | ||||||
|  | 	u32	rcv_rtt_last_tsecr; | ||||||
|  | 	u64	first_tx_mstamp;  /* start of window send phase */ | ||||||
|  | 	u64	delivered_mstamp; /* time we reached "delivered" */ | ||||||
| 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 | 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 | ||||||
| 				 * sum(delta(snd_una)), or how many bytes | 				 * sum(delta(snd_una)), or how many bytes | ||||||
| 				 * were acked. | 				 * were acked. | ||||||
| 				 */ | 				 */ | ||||||
|  | 	struct { | ||||||
|  | 		u32	rtt_us; | ||||||
|  | 		u32	seq; | ||||||
|  | 		u64	time; | ||||||
|  | 	} rcv_rtt_est; | ||||||
|  | /* Receiver queue space */ | ||||||
|  | 	struct { | ||||||
|  | 		u32	space; | ||||||
|  | 		u32	seq; | ||||||
|  | 		u64	time; | ||||||
|  | 	} rcvq_space; | ||||||
|  | 	__cacheline_group_end(tcp_sock_write_rx); | ||||||
|  | 	/* End of Hot Path */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  *	RFC793 variables by their proper names. This means you can | ||||||
|  |  *	read the code and the spec side by side (and laugh ...) | ||||||
|  |  *	See RFC793 and RFC1122. The RFC writes these in capitals. | ||||||
|  |  */ | ||||||
| 	u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
 | 	u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
 | ||||||
| 				 * total number of DSACK blocks received | 				 * total number of DSACK blocks received | ||||||
| 				 */ | 				 */ | ||||||
|  	u32	snd_una;	/* First byte we want an ack for	*/ |  | ||||||
|  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ |  | ||||||
| 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ |  | ||||||
| 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */ |  | ||||||
| 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */ | 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */ | ||||||
| 	u32	compressed_ack_rcv_nxt; | 	u32	compressed_ack_rcv_nxt; | ||||||
| 
 |  | ||||||
| 	u32	tsoffset;	/* timestamp offset */ |  | ||||||
| 
 |  | ||||||
| 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ | 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ | ||||||
| 	struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ |  | ||||||
| 
 | 
 | ||||||
| 	u32	snd_wl1;	/* Sequence for window update		*/ |  | ||||||
| 	u32	snd_wnd;	/* The window we expect to receive	*/ |  | ||||||
| 	u32	max_window;	/* Maximal window ever seen from peer	*/ |  | ||||||
| 	u32	mss_cache;	/* Cached effective mss, not including SACKS */ |  | ||||||
| 
 |  | ||||||
| 	u32	window_clamp;	/* Maximal window to advertise		*/ |  | ||||||
| 	u32	rcv_ssthresh;	/* Current window clamp			*/ |  | ||||||
| 	u8	scaling_ratio;	/* see tcp_win_from_space() */ | 	u8	scaling_ratio;	/* see tcp_win_from_space() */ | ||||||
| 	/* Information of the most recently (s)acked skb */ | 	/* Information of the most recently (s)acked skb */ | ||||||
| 	struct tcp_rack { | 	struct tcp_rack { | ||||||
|  | @ -272,24 +369,16 @@ struct tcp_sock { | ||||||
| 		   dsack_seen:1, /* Whether DSACK seen after last adj */ | 		   dsack_seen:1, /* Whether DSACK seen after last adj */ | ||||||
| 		   advanced:1;	 /* mstamp advanced since last lost marking */ | 		   advanced:1;	 /* mstamp advanced since last lost marking */ | ||||||
| 	} rack; | 	} rack; | ||||||
| 	u16	advmss;		/* Advertised MSS			*/ |  | ||||||
| 	u8	compressed_ack; | 	u8	compressed_ack; | ||||||
| 	u8	dup_ack_counter:2, | 	u8	dup_ack_counter:2, | ||||||
| 		tlp_retrans:1,	/* TLP is a retransmission */ | 		tlp_retrans:1,	/* TLP is a retransmission */ | ||||||
| 		tcp_usec_ts:1, /* TSval values in usec */ | 		tcp_usec_ts:1, /* TSval values in usec */ | ||||||
| 		unused:4; | 		unused:4; | ||||||
| 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ | 	u8	thin_lto    : 1,/* Use linear timeouts for thin streams */ | ||||||
| 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ | 		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ | ||||||
| 	u8	chrono_type:2,	/* current chronograph type */ |  | ||||||
| 		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */ |  | ||||||
| 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ | 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ | ||||||
| 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ | 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ | ||||||
| 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */ | 		fastopen_client_fail:2, /* reason why fastopen failed */ | ||||||
| 		fastopen_client_fail:2; /* reason why fastopen failed */ |  | ||||||
| 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */ |  | ||||||
| 		thin_lto    : 1,/* Use linear timeouts for thin streams */ |  | ||||||
| 		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ |  | ||||||
| 		repair      : 1, |  | ||||||
| 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */ | 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */ | ||||||
| 	u8	repair_queue; | 	u8	repair_queue; | ||||||
| 	u8	save_syn:2,	/* Save headers of SYN packet */ | 	u8	save_syn:2,	/* Save headers of SYN packet */ | ||||||
|  | @ -297,45 +386,19 @@ struct tcp_sock { | ||||||
| 		syn_fastopen:1,	/* SYN includes Fast Open option */ | 		syn_fastopen:1,	/* SYN includes Fast Open option */ | ||||||
| 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ | 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ | ||||||
| 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */ | 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */ | ||||||
| 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ | 		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ | ||||||
| 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ |  | ||||||
| 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ |  | ||||||
| 
 | 
 | ||||||
| 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ | 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ | ||||||
| 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ |  | ||||||
| 	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ |  | ||||||
| 
 | 
 | ||||||
| /* RTT measurement */ | /* RTT measurement */ | ||||||
| 	u64	tcp_mstamp;	/* most recent packet received/sent */ |  | ||||||
| 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */ |  | ||||||
| 	u32	mdev_us;	/* medium deviation			*/ |  | ||||||
| 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/ | 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/ | ||||||
| 	u32	rttvar_us;	/* smoothed mdev_max			*/ |  | ||||||
| 	u32	rtt_seq;	/* sequence number to update rttvar	*/ |  | ||||||
| 	struct  minmax rtt_min; |  | ||||||
| 
 | 
 | ||||||
| 	u32	packets_out;	/* Packets which are "in flight"	*/ |  | ||||||
| 	u32	retrans_out;	/* Retransmitted packets out		*/ |  | ||||||
| 	u32	max_packets_out;  /* max packets_out in last window */ |  | ||||||
| 	u32	cwnd_usage_seq;  /* right edge of cwnd usage tracking flight */ |  | ||||||
| 
 |  | ||||||
| 	u16	urg_data;	/* Saved octet of OOB data and control flags */ |  | ||||||
| 	u8	ecn_flags;	/* ECN status bits.			*/ |  | ||||||
| 	u8	keepalive_probes; /* num of allowed keep alive probes	*/ | 	u8	keepalive_probes; /* num of allowed keep alive probes	*/ | ||||||
| 	u32	reordering;	/* Packet reordering metric.		*/ |  | ||||||
| 	u32	reord_seen;	/* number of data packet reordering events */ | 	u32	reord_seen;	/* number of data packet reordering events */ | ||||||
| 	u32	snd_up;		/* Urgent pointer		*/ |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  *      Options received (usually on last packet, some only on SYN packets). |  | ||||||
|  */ |  | ||||||
| 	struct tcp_options_received rx_opt; |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  *	Slow start and congestion control (see also Nagle, and Karn & Partridge) |  *	Slow start and congestion control (see also Nagle, and Karn & Partridge) | ||||||
|  */ |  */ | ||||||
|  	u32	snd_ssthresh;	/* Slow start size threshold		*/ |  | ||||||
|  	u32	snd_cwnd;	/* Sending congestion window		*/ |  | ||||||
| 	u32	snd_cwnd_cnt;	/* Linear increase counter		*/ | 	u32	snd_cwnd_cnt;	/* Linear increase counter		*/ | ||||||
| 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ | 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ | ||||||
| 	u32	snd_cwnd_used; | 	u32	snd_cwnd_used; | ||||||
|  | @ -343,32 +406,10 @@ struct tcp_sock { | ||||||
| 	u32	prior_cwnd;	/* cwnd right before starting loss recovery */ | 	u32	prior_cwnd;	/* cwnd right before starting loss recovery */ | ||||||
| 	u32	prr_delivered;	/* Number of newly delivered packets to
 | 	u32	prr_delivered;	/* Number of newly delivered packets to
 | ||||||
| 				 * receiver in Recovery. */ | 				 * receiver in Recovery. */ | ||||||
| 	u32	prr_out;	/* Total number of pkts sent during Recovery. */ |  | ||||||
| 	u32	delivered;	/* Total data packets delivered incl. rexmits */ |  | ||||||
| 	u32	delivered_ce;	/* Like the above but only ECE marked packets */ |  | ||||||
| 	u32	lost;		/* Total data packets lost incl. rexmits */ |  | ||||||
| 	u32	app_limited;	/* limited until "delivered" reaches this val */ |  | ||||||
| 	u64	first_tx_mstamp;  /* start of window send phase */ |  | ||||||
| 	u64	delivered_mstamp; /* time we reached "delivered" */ |  | ||||||
| 	u32	rate_delivered;    /* saved rate sample: packets delivered */ |  | ||||||
| 	u32	rate_interval_us;  /* saved rate sample: time elapsed */ |  | ||||||
| 
 |  | ||||||
|  	u32	rcv_wnd;	/* Current receiver window		*/ |  | ||||||
| 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ |  | ||||||
| 	u32	notsent_lowat;	/* TCP_NOTSENT_LOWAT */ |  | ||||||
| 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ |  | ||||||
| 	u32	lost_out;	/* Lost packets			*/ |  | ||||||
| 	u32	sacked_out;	/* SACK'd packets			*/ |  | ||||||
| 
 | 
 | ||||||
| 	struct hrtimer	pacing_timer; | 	struct hrtimer	pacing_timer; | ||||||
| 	struct hrtimer	compressed_ack_timer; | 	struct hrtimer	compressed_ack_timer; | ||||||
| 
 | 
 | ||||||
| 	/* from STCP, retrans queue hinting */ |  | ||||||
| 	struct sk_buff* lost_skb_hint; |  | ||||||
| 	struct sk_buff *retransmit_skb_hint; |  | ||||||
| 
 |  | ||||||
| 	/* OOO segments go in this rbtree. Socket lock must be held. */ |  | ||||||
| 	struct rb_root	out_of_order_queue; |  | ||||||
| 	struct sk_buff	*ooo_last_skb; /* cache rb_last(out_of_order_queue) */ | 	struct sk_buff	*ooo_last_skb; /* cache rb_last(out_of_order_queue) */ | ||||||
| 
 | 
 | ||||||
| 	/* SACKs data, these 2 need to be together (see tcp_options_write) */ | 	/* SACKs data, these 2 need to be together (see tcp_options_write) */ | ||||||
|  | @ -377,12 +418,6 @@ struct tcp_sock { | ||||||
| 
 | 
 | ||||||
| 	struct tcp_sack_block recv_sack_cache[4]; | 	struct tcp_sack_block recv_sack_cache[4]; | ||||||
| 
 | 
 | ||||||
| 	struct sk_buff *highest_sack;   /* skb just after the highest
 |  | ||||||
| 					 * skb with SACKed bit set |  | ||||||
| 					 * (validity guaranteed only if |  | ||||||
| 					 * sacked_out > 0) |  | ||||||
| 					 */ |  | ||||||
| 
 |  | ||||||
| 	int     lost_cnt_hint; | 	int     lost_cnt_hint; | ||||||
| 
 | 
 | ||||||
| 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/ | 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/ | ||||||
|  | @ -433,21 +468,6 @@ struct tcp_sock { | ||||||
| 
 | 
 | ||||||
| 	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ | 	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ | ||||||
| 
 | 
 | ||||||
| /* Receiver side RTT estimation */ |  | ||||||
| 	u32 rcv_rtt_last_tsecr; |  | ||||||
| 	struct { |  | ||||||
| 		u32	rtt_us; |  | ||||||
| 		u32	seq; |  | ||||||
| 		u64	time; |  | ||||||
| 	} rcv_rtt_est; |  | ||||||
| 
 |  | ||||||
| /* Receiver queue space */ |  | ||||||
| 	struct { |  | ||||||
| 		u32	space; |  | ||||||
| 		u32	seq; |  | ||||||
| 		u64	time; |  | ||||||
| 	} rcvq_space; |  | ||||||
| 
 |  | ||||||
| /* TCP-specific MTU probe information. */ | /* TCP-specific MTU probe information. */ | ||||||
| 	struct { | 	struct { | ||||||
| 		u32		  probe_seq_start; | 		u32		  probe_seq_start; | ||||||
|  |  | ||||||
|  | @ -4564,6 +4564,97 @@ static void __init tcp_init_mem(void) | ||||||
| 	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */ | 	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void __init tcp_struct_check(void) | ||||||
|  | { | ||||||
|  | 	/* TX read-mostly hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40); | ||||||
|  | 
 | ||||||
|  | 	/* TXRX read-mostly hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); | ||||||
|  | 
 | ||||||
|  | 	/* RX read-mostly hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); | ||||||
|  | 
 | ||||||
|  | 	/* TX read-write hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); | ||||||
|  | 
 | ||||||
|  | 	/* TXRX read-write hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76); | ||||||
|  | 
 | ||||||
|  | 	/* RX read-write hotpath cache lines */ | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space); | ||||||
|  | 	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void __init tcp_init(void) | void __init tcp_init(void) | ||||||
| { | { | ||||||
| 	int max_rshare, max_wshare, cnt; | 	int max_rshare, max_wshare, cnt; | ||||||
|  | @ -4574,6 +4665,8 @@ void __init tcp_init(void) | ||||||
| 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > | 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > | ||||||
| 		     sizeof_field(struct sk_buff, cb)); | 		     sizeof_field(struct sk_buff, cb)); | ||||||
| 
 | 
 | ||||||
|  | 	tcp_struct_check(); | ||||||
|  | 
 | ||||||
| 	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); | 	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); | ||||||
| 
 | 
 | ||||||
| 	timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); | 	timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Coco Li
						Coco Li