mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	tcp: TCP Fast Open Server - main code path
This patch adds the main processing path to complete the TFO server patches. A TFO request (i.e., SYN+data packet with a TFO cookie option) first gets processed in tcp_v4_conn_request(). If it passes the various TFO checks by tcp_fastopen_check(), a child socket will be created right away to be accepted by applications, rather than waiting for the 3WHS to finish. In additon to the use of TFO cookie, a simple max_qlen based scheme is put in place to fend off spoofed TFO attack. When a valid ACK comes back to tcp_rcv_state_process(), it will cause the state of the child socket to switch from either TCP_SYN_RECV to TCP_ESTABLISHED, or TCP_FIN_WAIT1 to TCP_FIN_WAIT2. At this time retransmission will resume for any unack'ed (data, FIN,...) segments. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									8336886f78
								
							
						
					
					
						commit
						168a8f5805
					
				
					 2 changed files with 309 additions and 27 deletions
				
			
		| 
						 | 
					@ -3127,6 +3127,12 @@ void tcp_rearm_rto(struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* If the retrans timer is currently being used by Fast Open
 | 
				
			||||||
 | 
						 * for SYN-ACK retrans purpose, stay put.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (tp->fastopen_rsk)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!tp->packets_out) {
 | 
						if (!tp->packets_out) {
 | 
				
			||||||
		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 | 
							inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
| 
						 | 
					@ -5895,7 +5901,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
		tcp_send_synack(sk);
 | 
							tcp_send_synack(sk);
 | 
				
			||||||
#if 0
 | 
					#if 0
 | 
				
			||||||
		/* Note, we could accept data and URG from this segment.
 | 
							/* Note, we could accept data and URG from this segment.
 | 
				
			||||||
		 * There are no obstacles to make this.
 | 
							 * There are no obstacles to make this (except that we must
 | 
				
			||||||
 | 
							 * either change tcp_recvmsg() to prevent it from returning data
 | 
				
			||||||
 | 
							 * before 3WHS completes per RFC793, or employ TCP Fast Open).
 | 
				
			||||||
		 *
 | 
							 *
 | 
				
			||||||
		 * However, if we ignore data in ACKless segments sometimes,
 | 
							 * However, if we ignore data in ACKless segments sometimes,
 | 
				
			||||||
		 * we have no reasons to accept it sometimes.
 | 
							 * we have no reasons to accept it sometimes.
 | 
				
			||||||
| 
						 | 
					@ -5935,6 +5943,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
						struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
 | 
						struct request_sock *req;
 | 
				
			||||||
	int queued = 0;
 | 
						int queued = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tp->rx_opt.saw_tstamp = 0;
 | 
						tp->rx_opt.saw_tstamp = 0;
 | 
				
			||||||
| 
						 | 
					@ -5990,7 +5999,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!tcp_validate_incoming(sk, skb, th, 0))
 | 
						req = tp->fastopen_rsk;
 | 
				
			||||||
 | 
						if (req != NULL) {
 | 
				
			||||||
 | 
							BUG_ON(sk->sk_state != TCP_SYN_RECV &&
 | 
				
			||||||
 | 
							    sk->sk_state != TCP_FIN_WAIT1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
 | 
				
			||||||
 | 
								goto discard;
 | 
				
			||||||
 | 
						} else if (!tcp_validate_incoming(sk, skb, th, 0))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* step 5: check the ACK field */
 | 
						/* step 5: check the ACK field */
 | 
				
			||||||
| 
						 | 
					@ -6000,7 +6016,22 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
		switch (sk->sk_state) {
 | 
							switch (sk->sk_state) {
 | 
				
			||||||
		case TCP_SYN_RECV:
 | 
							case TCP_SYN_RECV:
 | 
				
			||||||
			if (acceptable) {
 | 
								if (acceptable) {
 | 
				
			||||||
				tp->copied_seq = tp->rcv_nxt;
 | 
									/* Once we leave TCP_SYN_RECV, we no longer
 | 
				
			||||||
 | 
									 * need req so release it.
 | 
				
			||||||
 | 
									 */
 | 
				
			||||||
 | 
									if (req) {
 | 
				
			||||||
 | 
										reqsk_fastopen_remove(sk, req, false);
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										/* Make sure socket is routed, for
 | 
				
			||||||
 | 
										 * correct metrics.
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
 | 
										icsk->icsk_af_ops->rebuild_header(sk);
 | 
				
			||||||
 | 
										tcp_init_congestion_control(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										tcp_mtup_init(sk);
 | 
				
			||||||
 | 
										tcp_init_buffer_space(sk);
 | 
				
			||||||
 | 
										tp->copied_seq = tp->rcv_nxt;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
				smp_mb();
 | 
									smp_mb();
 | 
				
			||||||
				tcp_set_state(sk, TCP_ESTABLISHED);
 | 
									tcp_set_state(sk, TCP_ESTABLISHED);
 | 
				
			||||||
				sk->sk_state_change(sk);
 | 
									sk->sk_state_change(sk);
 | 
				
			||||||
| 
						 | 
					@ -6022,23 +6053,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
				if (tp->rx_opt.tstamp_ok)
 | 
									if (tp->rx_opt.tstamp_ok)
 | 
				
			||||||
					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 | 
										tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				/* Make sure socket is routed, for
 | 
									if (req) {
 | 
				
			||||||
				 * correct metrics.
 | 
										/* Re-arm the timer because data may
 | 
				
			||||||
				 */
 | 
										 * have been sent out. This is similar
 | 
				
			||||||
				icsk->icsk_af_ops->rebuild_header(sk);
 | 
										 * to the regular data transmission case
 | 
				
			||||||
 | 
										 * when new data has just been ack'ed.
 | 
				
			||||||
				tcp_init_metrics(sk);
 | 
										 *
 | 
				
			||||||
 | 
										 * (TFO) - we could try to be more
 | 
				
			||||||
				tcp_init_congestion_control(sk);
 | 
										 * aggressive and retranmitting any data
 | 
				
			||||||
 | 
										 * sooner based on when they were sent
 | 
				
			||||||
 | 
										 * out.
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
 | 
										tcp_rearm_rto(sk);
 | 
				
			||||||
 | 
									} else
 | 
				
			||||||
 | 
										tcp_init_metrics(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				/* Prevent spurious tcp_cwnd_restart() on
 | 
									/* Prevent spurious tcp_cwnd_restart() on
 | 
				
			||||||
				 * first data packet.
 | 
									 * first data packet.
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				tp->lsndtime = tcp_time_stamp;
 | 
									tp->lsndtime = tcp_time_stamp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				tcp_mtup_init(sk);
 | 
					 | 
				
			||||||
				tcp_initialize_rcv_mss(sk);
 | 
									tcp_initialize_rcv_mss(sk);
 | 
				
			||||||
				tcp_init_buffer_space(sk);
 | 
					 | 
				
			||||||
				tcp_fast_path_on(tp);
 | 
									tcp_fast_path_on(tp);
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
				return 1;
 | 
									return 1;
 | 
				
			||||||
| 
						 | 
					@ -6046,6 +6081,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		case TCP_FIN_WAIT1:
 | 
							case TCP_FIN_WAIT1:
 | 
				
			||||||
 | 
								/* If we enter the TCP_FIN_WAIT1 state and we are a
 | 
				
			||||||
 | 
								 * Fast Open socket and this is the first acceptable
 | 
				
			||||||
 | 
								 * ACK we have received, this would have acknowledged
 | 
				
			||||||
 | 
								 * our SYNACK so stop the SYNACK timer.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (acceptable && req != NULL) {
 | 
				
			||||||
 | 
									/* We no longer need the request sock. */
 | 
				
			||||||
 | 
									reqsk_fastopen_remove(sk, req, false);
 | 
				
			||||||
 | 
									tcp_rearm_rto(sk);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			if (tp->snd_una == tp->write_seq) {
 | 
								if (tp->snd_una == tp->write_seq) {
 | 
				
			||||||
				struct dst_entry *dst;
 | 
									struct dst_entry *dst;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 | 
				
			||||||
	const int code = icmp_hdr(icmp_skb)->code;
 | 
						const int code = icmp_hdr(icmp_skb)->code;
 | 
				
			||||||
	struct sock *sk;
 | 
						struct sock *sk;
 | 
				
			||||||
	struct sk_buff *skb;
 | 
						struct sk_buff *skb;
 | 
				
			||||||
 | 
						struct request_sock *req;
 | 
				
			||||||
	__u32 seq;
 | 
						__u32 seq;
 | 
				
			||||||
	__u32 remaining;
 | 
						__u32 remaining;
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
| 
						 | 
					@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	icsk = inet_csk(sk);
 | 
						icsk = inet_csk(sk);
 | 
				
			||||||
	tp = tcp_sk(sk);
 | 
						tp = tcp_sk(sk);
 | 
				
			||||||
 | 
						req = tp->fastopen_rsk;
 | 
				
			||||||
	seq = ntohl(th->seq);
 | 
						seq = ntohl(th->seq);
 | 
				
			||||||
	if (sk->sk_state != TCP_LISTEN &&
 | 
						if (sk->sk_state != TCP_LISTEN &&
 | 
				
			||||||
	    !between(seq, tp->snd_una, tp->snd_nxt)) {
 | 
						    !between(seq, tp->snd_una, tp->snd_nxt) &&
 | 
				
			||||||
 | 
						    (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
 | 
				
			||||||
 | 
							/* For a Fast Open socket, allow seq to be snt_isn. */
 | 
				
			||||||
		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 | 
							NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 | 
				
			||||||
		    !icsk->icsk_backoff)
 | 
							    !icsk->icsk_backoff)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* XXX (TFO) - revisit the following logic for TFO */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (sock_owned_by_user(sk))
 | 
							if (sock_owned_by_user(sk))
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* XXX (TFO) - if it's a TFO socket and has been accepted, rather
 | 
				
			||||||
 | 
						 * than following the TCP_SYN_RECV case and closing the socket,
 | 
				
			||||||
 | 
						 * we ignore the ICMP error and keep trying like a fully established
 | 
				
			||||||
 | 
						 * socket. Is this the right thing to do?
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (req && req->sk == NULL)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (sk->sk_state) {
 | 
						switch (sk->sk_state) {
 | 
				
			||||||
		struct request_sock *req, **prev;
 | 
							struct request_sock *req, **prev;
 | 
				
			||||||
	case TCP_LISTEN:
 | 
						case TCP_LISTEN:
 | 
				
			||||||
| 
						 | 
					@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	case TCP_SYN_SENT:
 | 
						case TCP_SYN_SENT:
 | 
				
			||||||
	case TCP_SYN_RECV:  /* Cannot happen.
 | 
						case TCP_SYN_RECV:  /* Cannot happen.
 | 
				
			||||||
			       It can f.e. if SYNs crossed.
 | 
								       It can f.e. if SYNs crossed,
 | 
				
			||||||
 | 
								       or Fast Open.
 | 
				
			||||||
			     */
 | 
								     */
 | 
				
			||||||
		if (!sock_owned_by_user(sk)) {
 | 
							if (!sock_owned_by_user(sk)) {
 | 
				
			||||||
			sk->sk_err = err;
 | 
								sk->sk_err = err;
 | 
				
			||||||
| 
						 | 
					@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 | 
					static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
				  struct request_sock *req)
 | 
									  struct request_sock *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
 | 
						/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 | 
				
			||||||
			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 | 
						 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
 | 
				
			||||||
 | 
								tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 | 
				
			||||||
 | 
								tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
 | 
				
			||||||
			req->ts_recent,
 | 
								req->ts_recent,
 | 
				
			||||||
			0,
 | 
								0,
 | 
				
			||||||
			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 | 
								tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
 | 
				
			||||||
| 
						 | 
					@ -1272,6 +1291,178 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
								       struct request_sock *req,
 | 
				
			||||||
 | 
								       struct tcp_fastopen_cookie *foc,
 | 
				
			||||||
 | 
								       struct tcp_fastopen_cookie *valid_foc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool skip_cookie = false;
 | 
				
			||||||
 | 
						struct fastopen_queue *fastopenq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (likely(!fastopen_cookie_present(foc))) {
 | 
				
			||||||
 | 
							/* See include/net/tcp.h for the meaning of these knobs */
 | 
				
			||||||
 | 
							if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
 | 
				
			||||||
 | 
							    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
 | 
				
			||||||
 | 
							    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
 | 
				
			||||||
 | 
								skip_cookie = true; /* no cookie to validate */
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
 | 
				
			||||||
 | 
						/* A FO option is present; bump the counter. */
 | 
				
			||||||
 | 
						NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Make sure the listener has enabled fastopen, and we don't
 | 
				
			||||||
 | 
						 * exceed the max # of pending TFO requests allowed before trying
 | 
				
			||||||
 | 
						 * to validating the cookie in order to avoid burning CPU cycles
 | 
				
			||||||
 | 
						 * unnecessarily.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * XXX (TFO) - The implication of checking the max_qlen before
 | 
				
			||||||
 | 
						 * processing a cookie request is that clients can't differentiate
 | 
				
			||||||
 | 
						 * between qlen overflow causing Fast Open to be disabled
 | 
				
			||||||
 | 
						 * temporarily vs a server not supporting Fast Open at all.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
 | 
				
			||||||
 | 
						    fastopenq == NULL || fastopenq->max_qlen == 0)
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (fastopenq->qlen >= fastopenq->max_qlen) {
 | 
				
			||||||
 | 
							struct request_sock *req1;
 | 
				
			||||||
 | 
							spin_lock(&fastopenq->lock);
 | 
				
			||||||
 | 
							req1 = fastopenq->rskq_rst_head;
 | 
				
			||||||
 | 
							if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
 | 
				
			||||||
 | 
								spin_unlock(&fastopenq->lock);
 | 
				
			||||||
 | 
								NET_INC_STATS_BH(sock_net(sk),
 | 
				
			||||||
 | 
								    LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
 | 
				
			||||||
 | 
								/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
 | 
				
			||||||
 | 
								foc->len = -1;
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							fastopenq->rskq_rst_head = req1->dl_next;
 | 
				
			||||||
 | 
							fastopenq->qlen--;
 | 
				
			||||||
 | 
							spin_unlock(&fastopenq->lock);
 | 
				
			||||||
 | 
							reqsk_free(req1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (skip_cookie) {
 | 
				
			||||||
 | 
							tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 | 
				
			||||||
 | 
							return true;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
 | 
				
			||||||
 | 
							if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
 | 
				
			||||||
 | 
								tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
 | 
				
			||||||
 | 
								if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
 | 
				
			||||||
 | 
								    memcmp(&foc->val[0], &valid_foc->val[0],
 | 
				
			||||||
 | 
								    TCP_FASTOPEN_COOKIE_SIZE) != 0)
 | 
				
			||||||
 | 
									return false;
 | 
				
			||||||
 | 
								valid_foc->len = -1;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							/* Acknowledge the data received from the peer. */
 | 
				
			||||||
 | 
							tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 | 
				
			||||||
 | 
							return true;
 | 
				
			||||||
 | 
						} else if (foc->len == 0) { /* Client requesting a cookie */
 | 
				
			||||||
 | 
							tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
 | 
				
			||||||
 | 
							NET_INC_STATS_BH(sock_net(sk),
 | 
				
			||||||
 | 
							    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							/* Client sent a cookie with wrong size. Treat it
 | 
				
			||||||
 | 
							 * the same as invalid and return a valid one.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return false;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int tcp_v4_conn_req_fastopen(struct sock *sk,
 | 
				
			||||||
 | 
									    struct sk_buff *skb,
 | 
				
			||||||
 | 
									    struct sk_buff *skb_synack,
 | 
				
			||||||
 | 
									    struct request_sock *req,
 | 
				
			||||||
 | 
									    struct request_values *rvp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
 | 
						struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 | 
				
			||||||
 | 
						const struct inet_request_sock *ireq = inet_rsk(req);
 | 
				
			||||||
 | 
						struct sock *child;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						req->retrans = 0;
 | 
				
			||||||
 | 
						req->sk = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
 | 
				
			||||||
 | 
						if (child == NULL) {
 | 
				
			||||||
 | 
							NET_INC_STATS_BH(sock_net(sk),
 | 
				
			||||||
 | 
									 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 | 
				
			||||||
 | 
							kfree_skb(skb_synack);
 | 
				
			||||||
 | 
							return -1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
 | 
				
			||||||
 | 
								ireq->rmt_addr, ireq->opt);
 | 
				
			||||||
 | 
						/* XXX (TFO) - is it ok to ignore error and continue? */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
						queue->fastopenq->qlen++;
 | 
				
			||||||
 | 
						spin_unlock(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Initialize the child socket. Have to fix some values to take
 | 
				
			||||||
 | 
						 * into account the child is a Fast Open socket and is created
 | 
				
			||||||
 | 
						 * only out of the bits carried in the SYN packet.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						tp = tcp_sk(child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						tp->fastopen_rsk = req;
 | 
				
			||||||
 | 
						/* Do a hold on the listner sk so that if the listener is being
 | 
				
			||||||
 | 
						 * closed, the child that has been accepted can live on and still
 | 
				
			||||||
 | 
						 * access listen_lock.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						sock_hold(sk);
 | 
				
			||||||
 | 
						tcp_rsk(req)->listener = sk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* RFC1323: The window in SYN & SYN/ACK segments is never
 | 
				
			||||||
 | 
						 * scaled. So correct it appropriately.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Activate the retrans timer so that SYNACK can be retransmitted.
 | 
				
			||||||
 | 
						 * The request socket is not added to the SYN table of the parent
 | 
				
			||||||
 | 
						 * because it's been added to the accept queue directly.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
 | 
				
			||||||
 | 
						    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Add the child socket directly into the accept queue */
 | 
				
			||||||
 | 
						inet_csk_reqsk_queue_add(sk, req, child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Now finish processing the fastopen child socket. */
 | 
				
			||||||
 | 
						inet_csk(child)->icsk_af_ops->rebuild_header(child);
 | 
				
			||||||
 | 
						tcp_init_congestion_control(child);
 | 
				
			||||||
 | 
						tcp_mtup_init(child);
 | 
				
			||||||
 | 
						tcp_init_buffer_space(child);
 | 
				
			||||||
 | 
						tcp_init_metrics(child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Queue the data carried in the SYN packet. We need to first
 | 
				
			||||||
 | 
						 * bump skb's refcnt because the caller will attempt to free it.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * XXX (TFO) - we honor a zero-payload TFO request for now.
 | 
				
			||||||
 | 
						 * (Any reason not to?)
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
 | 
				
			||||||
 | 
							/* Don't queue the skb if there is no payload in SYN.
 | 
				
			||||||
 | 
							 * XXX (TFO) - How about SYN+FIN?
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							skb = skb_get(skb);
 | 
				
			||||||
 | 
							skb_dst_drop(skb);
 | 
				
			||||||
 | 
							__skb_pull(skb, tcp_hdr(skb)->doff * 4);
 | 
				
			||||||
 | 
							skb_set_owner_r(skb, child);
 | 
				
			||||||
 | 
							__skb_queue_tail(&child->sk_receive_queue, skb);
 | 
				
			||||||
 | 
							tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						sk->sk_data_ready(sk, 0);
 | 
				
			||||||
 | 
						bh_unlock_sock(child);
 | 
				
			||||||
 | 
						sock_put(child);
 | 
				
			||||||
 | 
						WARN_ON(req->sk == NULL);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
					int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_extend_values tmp_ext;
 | 
						struct tcp_extend_values tmp_ext;
 | 
				
			||||||
| 
						 | 
					@ -1285,6 +1476,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	__be32 daddr = ip_hdr(skb)->daddr;
 | 
						__be32 daddr = ip_hdr(skb)->daddr;
 | 
				
			||||||
	__u32 isn = TCP_SKB_CB(skb)->when;
 | 
						__u32 isn = TCP_SKB_CB(skb)->when;
 | 
				
			||||||
	bool want_cookie = false;
 | 
						bool want_cookie = false;
 | 
				
			||||||
 | 
						struct flowi4 fl4;
 | 
				
			||||||
 | 
						struct tcp_fastopen_cookie foc = { .len = -1 };
 | 
				
			||||||
 | 
						struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 | 
				
			||||||
 | 
						struct sk_buff *skb_synack;
 | 
				
			||||||
 | 
						int do_fastopen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Never answer to SYNs send to broadcast or multicast */
 | 
						/* Never answer to SYNs send to broadcast or multicast */
 | 
				
			||||||
	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 | 
						if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 | 
				
			||||||
| 
						 | 
					@ -1319,7 +1515,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	tcp_clear_options(&tmp_opt);
 | 
						tcp_clear_options(&tmp_opt);
 | 
				
			||||||
	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 | 
						tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 | 
				
			||||||
	tmp_opt.user_mss  = tp->rx_opt.user_mss;
 | 
						tmp_opt.user_mss  = tp->rx_opt.user_mss;
 | 
				
			||||||
	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
 | 
						tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
 | 
				
			||||||
 | 
						    want_cookie ? NULL : &foc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tmp_opt.cookie_plus > 0 &&
 | 
						if (tmp_opt.cookie_plus > 0 &&
 | 
				
			||||||
	    tmp_opt.saw_tstamp &&
 | 
						    tmp_opt.saw_tstamp &&
 | 
				
			||||||
| 
						 | 
					@ -1377,8 +1574,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
 | 
							isn = cookie_v4_init_sequence(sk, skb, &req->mss);
 | 
				
			||||||
		req->cookie_ts = tmp_opt.tstamp_ok;
 | 
							req->cookie_ts = tmp_opt.tstamp_ok;
 | 
				
			||||||
	} else if (!isn) {
 | 
						} else if (!isn) {
 | 
				
			||||||
		struct flowi4 fl4;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* VJ's idea. We save last timestamp seen
 | 
							/* VJ's idea. We save last timestamp seen
 | 
				
			||||||
		 * from the destination in peer table, when entering
 | 
							 * from the destination in peer table, when entering
 | 
				
			||||||
		 * state TIME-WAIT, and check against it before
 | 
							 * state TIME-WAIT, and check against it before
 | 
				
			||||||
| 
						 | 
					@ -1419,14 +1614,52 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	tcp_rsk(req)->snt_isn = isn;
 | 
						tcp_rsk(req)->snt_isn = isn;
 | 
				
			||||||
	tcp_rsk(req)->snt_synack = tcp_time_stamp;
 | 
						tcp_rsk(req)->snt_synack = tcp_time_stamp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tcp_v4_send_synack(sk, dst, req,
 | 
						if (dst == NULL) {
 | 
				
			||||||
			       (struct request_values *)&tmp_ext,
 | 
							dst = inet_csk_route_req(sk, &fl4, req);
 | 
				
			||||||
			       skb_get_queue_mapping(skb),
 | 
							if (dst == NULL)
 | 
				
			||||||
			       want_cookie) ||
 | 
								goto drop_and_free;
 | 
				
			||||||
	    want_cookie)
 | 
						}
 | 
				
			||||||
 | 
						do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* We don't call tcp_v4_send_synack() directly because we need
 | 
				
			||||||
 | 
						 * to make sure a child socket can be created successfully before
 | 
				
			||||||
 | 
						 * sending back synack!
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
 | 
				
			||||||
 | 
						 * (or better yet, call tcp_send_synack() in the child context
 | 
				
			||||||
 | 
						 * directly, but will have to fix bunch of other code first)
 | 
				
			||||||
 | 
						 * after syn_recv_sock() except one will need to first fix the
 | 
				
			||||||
 | 
						 * latter to remove its dependency on the current implementation
 | 
				
			||||||
 | 
						 * of tcp_v4_send_synack()->tcp_select_initial_window().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						skb_synack = tcp_make_synack(sk, dst, req,
 | 
				
			||||||
 | 
						    (struct request_values *)&tmp_ext,
 | 
				
			||||||
 | 
						    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb_synack) {
 | 
				
			||||||
 | 
							__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
 | 
				
			||||||
 | 
							skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
 | 
				
			||||||
 | 
						} else
 | 
				
			||||||
 | 
							goto drop_and_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (likely(!do_fastopen)) {
 | 
				
			||||||
 | 
							int err;
 | 
				
			||||||
 | 
							err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
 | 
				
			||||||
 | 
							     ireq->rmt_addr, ireq->opt);
 | 
				
			||||||
 | 
							err = net_xmit_eval(err);
 | 
				
			||||||
 | 
							if (err || want_cookie)
 | 
				
			||||||
 | 
								goto drop_and_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							tcp_rsk(req)->listener = NULL;
 | 
				
			||||||
 | 
							/* Add the request_sock to the SYN table */
 | 
				
			||||||
 | 
							inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 | 
				
			||||||
 | 
							if (fastopen_cookie_present(&foc) && foc.len != 0)
 | 
				
			||||||
 | 
								NET_INC_STATS_BH(sock_net(sk),
 | 
				
			||||||
 | 
								    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 | 
				
			||||||
 | 
						} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
 | 
				
			||||||
 | 
						    (struct request_values *)&tmp_ext))
 | 
				
			||||||
		goto drop_and_free;
 | 
							goto drop_and_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
drop_and_release:
 | 
					drop_and_release:
 | 
				
			||||||
| 
						 | 
					@ -1977,6 +2210,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 | 
				
			||||||
			 tcp_cookie_values_release);
 | 
								 tcp_cookie_values_release);
 | 
				
			||||||
		tp->cookie_values = NULL;
 | 
							tp->cookie_values = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						BUG_ON(tp->fastopen_rsk != NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* If socket is aborted during connect operation */
 | 
						/* If socket is aborted during connect operation */
 | 
				
			||||||
	tcp_free_fastopen_req(tp);
 | 
						tcp_free_fastopen_req(tp);
 | 
				
			||||||
| 
						 | 
					@ -2425,6 +2659,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 | 
				
			||||||
	const struct tcp_sock *tp = tcp_sk(sk);
 | 
						const struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
	const struct inet_connection_sock *icsk = inet_csk(sk);
 | 
						const struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
	const struct inet_sock *inet = inet_sk(sk);
 | 
						const struct inet_sock *inet = inet_sk(sk);
 | 
				
			||||||
 | 
						struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
 | 
				
			||||||
	__be32 dest = inet->inet_daddr;
 | 
						__be32 dest = inet->inet_daddr;
 | 
				
			||||||
	__be32 src = inet->inet_rcv_saddr;
 | 
						__be32 src = inet->inet_rcv_saddr;
 | 
				
			||||||
	__u16 destp = ntohs(inet->inet_dport);
 | 
						__u16 destp = ntohs(inet->inet_dport);
 | 
				
			||||||
| 
						 | 
					@ -2469,7 +2704,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 | 
				
			||||||
		jiffies_to_clock_t(icsk->icsk_ack.ato),
 | 
							jiffies_to_clock_t(icsk->icsk_ack.ato),
 | 
				
			||||||
		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 | 
							(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 | 
				
			||||||
		tp->snd_cwnd,
 | 
							tp->snd_cwnd,
 | 
				
			||||||
		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
 | 
							sk->sk_state == TCP_LISTEN ?
 | 
				
			||||||
 | 
							    (fastopenq ? fastopenq->max_qlen : 0) :
 | 
				
			||||||
 | 
							    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
 | 
				
			||||||
		len);
 | 
							len);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue