forked from mirrors/linux
		
	ipv4: Use binary search to choose tcp PMTU probe_size
Current probe_size is chosen by doubling mss_cache, the probing process will end shortly with a sub-optimal mss size, and the link mtu will not be taken full advantage of, in return, this will make user to tweak tcp_base_mss with care. Use binary search to choose probe_size in a fine granularity manner, an optimal mss will be found to boost performance as its maxmium. In addition, introduce a sysctl_tcp_probe_threshold to control when probing will stop in respect to the width of search range. Test env: Docker instance with vxlan encapuslation(82599EB) iperf -c 10.0.0.24 -t 60 before this patch: 1.26 Gbits/sec After this patch: increase 26% 1.59 Gbits/sec Signed-off-by: Fan Du <fan.du@intel.com> Acked-by: John Heffner <johnwheffner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									dcd8fb8533
								
							
						
					
					
						commit
						6b58e0a5f3
					
				
					 5 changed files with 23 additions and 3 deletions
				
			
		| 
						 | 
					@ -87,6 +87,7 @@ struct netns_ipv4 {
 | 
				
			||||||
	int sysctl_tcp_fwmark_accept;
 | 
						int sysctl_tcp_fwmark_accept;
 | 
				
			||||||
	int sysctl_tcp_mtu_probing;
 | 
						int sysctl_tcp_mtu_probing;
 | 
				
			||||||
	int sysctl_tcp_base_mss;
 | 
						int sysctl_tcp_base_mss;
 | 
				
			||||||
 | 
						int sysctl_tcp_probe_threshold;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct ping_group_range ping_group_range;
 | 
						struct ping_group_range ping_group_range;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 | 
				
			||||||
/* The least MTU to use for probing */
 | 
					/* The least MTU to use for probing */
 | 
				
			||||||
#define TCP_BASE_MSS		1024
 | 
					#define TCP_BASE_MSS		1024
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Specify interval when tcp mtu probing will stop */
 | 
				
			||||||
 | 
					#define TCP_PROBE_THRESHOLD	8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* After receiving this amount of duplicate ACKs fast retransmit starts. */
 | 
					/* After receiving this amount of duplicate ACKs fast retransmit starts. */
 | 
				
			||||||
#define TCP_FASTRETRANS_THRESH 3
 | 
					#define TCP_FASTRETRANS_THRESH 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -883,6 +883,13 @@ static struct ctl_table ipv4_net_table[] = {
 | 
				
			||||||
		.mode		= 0644,
 | 
							.mode		= 0644,
 | 
				
			||||||
		.proc_handler	= proc_dointvec,
 | 
							.proc_handler	= proc_dointvec,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "tcp_probe_threshold",
 | 
				
			||||||
 | 
							.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dointvec,
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ }
 | 
						{ }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2460,6 +2460,7 @@ static int __net_init tcp_sk_init(struct net *net)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	net->ipv4.sysctl_tcp_ecn = 2;
 | 
						net->ipv4.sysctl_tcp_ecn = 2;
 | 
				
			||||||
	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 | 
						net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 | 
				
			||||||
 | 
						net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fail:
 | 
					fail:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1842,11 +1842,13 @@ static int tcp_mtu_probe(struct sock *sk)
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
						struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
	struct sk_buff *skb, *nskb, *next;
 | 
						struct sk_buff *skb, *nskb, *next;
 | 
				
			||||||
 | 
						struct net *net = sock_net(sk);
 | 
				
			||||||
	int len;
 | 
						int len;
 | 
				
			||||||
	int probe_size;
 | 
						int probe_size;
 | 
				
			||||||
	int size_needed;
 | 
						int size_needed;
 | 
				
			||||||
	int copy;
 | 
						int copy;
 | 
				
			||||||
	int mss_now;
 | 
						int mss_now;
 | 
				
			||||||
 | 
						int interval;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Not currently probing/verifying,
 | 
						/* Not currently probing/verifying,
 | 
				
			||||||
	 * not in recovery,
 | 
						 * not in recovery,
 | 
				
			||||||
| 
						 | 
					@ -1859,11 +1861,17 @@ static int tcp_mtu_probe(struct sock *sk)
 | 
				
			||||||
	    tp->rx_opt.num_sacks || tp->rx_opt.dsack)
 | 
						    tp->rx_opt.num_sacks || tp->rx_opt.dsack)
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Very simple search strategy: just double the MSS. */
 | 
						/* Use binary search for probe_size between tcp_mss_base,
 | 
				
			||||||
 | 
						 * and current mss_clamp. if (search_high - search_low)
 | 
				
			||||||
 | 
						 * smaller than a threshold, backoff from probing.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	mss_now = tcp_current_mss(sk);
 | 
						mss_now = tcp_current_mss(sk);
 | 
				
			||||||
	probe_size = 2 * tp->mss_cache;
 | 
						probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
 | 
				
			||||||
 | 
									    icsk->icsk_mtup.search_low) >> 1);
 | 
				
			||||||
	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 | 
						size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 | 
				
			||||||
	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
 | 
						interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
 | 
				
			||||||
 | 
						if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
 | 
				
			||||||
 | 
						    interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
 | 
				
			||||||
		/* TODO: set timer for probe_converge_event */
 | 
							/* TODO: set timer for probe_converge_event */
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue