mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	tcp: TCP Fast Open Server - support TFO listeners
This patch builds on top of the previous patch to add the support for TFO listeners. This includes - 1. allocating, properly initializing, and managing the per listener fastopen_queue structure when TFO is enabled 2. changes to the inet_csk_accept code to support TFO. E.g., the request_sock can no longer be freed upon accept(), not until 3WHS finishes 3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg() if it's a TFO socket 4. properly closing a TFO listener, and a TFO socket before 3WHS finishes 5. supporting TCP_FASTOPEN socket option 6. modifying tcp_check_req() to use to check a TFO socket as well as request_sock 7. supporting TCP's TFO cookie option 8. adding a new SYN-ACK retransmit handler to use the timer directly off the TFO socket rather than the listener socket. Note that TFO server side will not retransmit anything other than SYN-ACK until the 3WHS is completed. The patch also contains an important function "reqsk_fastopen_remove()" to manage the somewhat complex relation between a listener, its request_sock, and the corresponding child socket. See the comment above the function for the detail. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									1046716368
								
							
						
					
					
						commit
						8336886f78
					
				
					 13 changed files with 329 additions and 49 deletions
				
			
		| 
						 | 
					@ -226,19 +226,6 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
 | 
				
			||||||
	return req;
 | 
						return req;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
 | 
					 | 
				
			||||||
						 struct sock *parent)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct request_sock *req = reqsk_queue_remove(queue);
 | 
					 | 
				
			||||||
	struct sock *child = req->sk;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WARN_ON(child == NULL);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	sk_acceptq_removed(parent);
 | 
					 | 
				
			||||||
	__reqsk_free(req);
 | 
					 | 
				
			||||||
	return child;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int reqsk_queue_removed(struct request_sock_queue *queue,
 | 
					static inline int reqsk_queue_removed(struct request_sock_queue *queue,
 | 
				
			||||||
				      struct request_sock *req)
 | 
									      struct request_sock *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -424,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *
 | 
				
			||||||
						     const struct tcphdr *th);
 | 
											     const struct tcphdr *th);
 | 
				
			||||||
extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
 | 
					extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
 | 
				
			||||||
				   struct request_sock *req,
 | 
									   struct request_sock *req,
 | 
				
			||||||
				   struct request_sock **prev);
 | 
									   struct request_sock **prev,
 | 
				
			||||||
 | 
									   bool fastopen);
 | 
				
			||||||
extern int tcp_child_process(struct sock *parent, struct sock *child,
 | 
					extern int tcp_child_process(struct sock *parent, struct sock *child,
 | 
				
			||||||
			     struct sk_buff *skb);
 | 
								     struct sk_buff *skb);
 | 
				
			||||||
extern bool tcp_use_frto(struct sock *sk);
 | 
					extern bool tcp_use_frto(struct sock *sk);
 | 
				
			||||||
| 
						 | 
					@ -478,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 | 
				
			||||||
extern int tcp_connect(struct sock *sk);
 | 
					extern int tcp_connect(struct sock *sk);
 | 
				
			||||||
extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
					extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
					struct request_sock *req,
 | 
										struct request_sock *req,
 | 
				
			||||||
					struct request_values *rvp);
 | 
										struct request_values *rvp,
 | 
				
			||||||
 | 
										struct tcp_fastopen_cookie *foc);
 | 
				
			||||||
extern int tcp_disconnect(struct sock *sk, int flags);
 | 
					extern int tcp_disconnect(struct sock *sk, int flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void tcp_connect_init(struct sock *sk);
 | 
					void tcp_connect_init(struct sock *sk);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,7 @@
 | 
				
			||||||
#include <linux/random.h>
 | 
					#include <linux/random.h>
 | 
				
			||||||
#include <linux/slab.h>
 | 
					#include <linux/slab.h>
 | 
				
			||||||
#include <linux/string.h>
 | 
					#include <linux/string.h>
 | 
				
			||||||
 | 
					#include <linux/tcp.h>
 | 
				
			||||||
#include <linux/vmalloc.h>
 | 
					#include <linux/vmalloc.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <net/request_sock.h>
 | 
					#include <net/request_sock.h>
 | 
				
			||||||
| 
						 | 
					@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 | 
				
			||||||
		kfree(lopt);
 | 
							kfree(lopt);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * This function is called to set a Fast Open socket's "fastopen_rsk" field
 | 
				
			||||||
 | 
					 * to NULL when a TFO socket no longer needs to access the request_sock.
 | 
				
			||||||
 | 
					 * This happens only after 3WHS has been either completed or aborted (e.g.,
 | 
				
			||||||
 | 
					 * RST is received).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Before TFO, a child socket is created only after 3WHS is completed,
 | 
				
			||||||
 | 
					 * hence it never needs to access the request_sock. things get a lot more
 | 
				
			||||||
 | 
					 * complex with TFO. A child socket, accepted or not, has to access its
 | 
				
			||||||
 | 
					 * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
 | 
				
			||||||
 | 
					 * until 3WHS is either completed or aborted. Afterwards the req will stay
 | 
				
			||||||
 | 
					 * until either the child socket is accepted, or in the rare case when the
 | 
				
			||||||
 | 
					 * listener is closed before the child is accepted.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * In short, a request socket is only freed after BOTH 3WHS has completed
 | 
				
			||||||
 | 
					 * (or aborted) and the child socket has been accepted (or listener closed).
 | 
				
			||||||
 | 
					 * When a child socket is accepted, its corresponding req->sk is set to
 | 
				
			||||||
 | 
					 * NULL since it's no longer needed. More importantly, "req->sk == NULL"
 | 
				
			||||||
 | 
					 * will be used by the code below to determine if a child socket has been
 | 
				
			||||||
 | 
					 * accepted or not, and the check is protected by the fastopenq->lock
 | 
				
			||||||
 | 
					 * described below.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note that fastopen_rsk is only accessed from the child socket's context
 | 
				
			||||||
 | 
					 * with its socket lock held. But a request_sock (req) can be accessed by
 | 
				
			||||||
 | 
					 * both its child socket through fastopen_rsk, and a listener socket through
 | 
				
			||||||
 | 
					 * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
 | 
				
			||||||
 | 
					 * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
 | 
				
			||||||
 | 
					 * only in the rare case when both the listener and the child locks are held,
 | 
				
			||||||
 | 
					 * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
 | 
				
			||||||
 | 
					 * The lock also protects other fields such as fastopenq->qlen, which is
 | 
				
			||||||
 | 
					 * decremented by this function when fastopen_rsk is no longer needed.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note that another solution was to simply use the existing socket lock
 | 
				
			||||||
 | 
					 * from the listener. But first socket lock is difficult to use. It is not
 | 
				
			||||||
 | 
					 * a simple spin lock - one must consider sock_owned_by_user() and arrange
 | 
				
			||||||
 | 
					 * to use sk_add_backlog() stuff. But what really makes it infeasible is the
 | 
				
			||||||
 | 
					 * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
 | 
				
			||||||
 | 
					 * acquire a child's lock while holding listener's socket lock. A corner
 | 
				
			||||||
 | 
					 * case might also exist in tcp_v4_hnd_req() that will trigger this locking
 | 
				
			||||||
 | 
					 * order.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * When a TFO req is created, it needs to sock_hold its listener to prevent
 | 
				
			||||||
 | 
					 * the latter data structure from going away.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function also sets "treq->listener" to NULL and unreference listener
 | 
				
			||||||
 | 
					 * socket. treq->listener is used by the listener so it is protected by the
 | 
				
			||||||
 | 
					 * fastopenq->lock in this function.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 | 
				
			||||||
 | 
								   bool reset)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sock *lsk = tcp_rsk(req)->listener;
 | 
				
			||||||
 | 
						struct fastopen_queue *fastopenq =
 | 
				
			||||||
 | 
						    inet_csk(lsk)->icsk_accept_queue.fastopenq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						tcp_sk(sk)->fastopen_rsk = NULL;
 | 
				
			||||||
 | 
						spin_lock_bh(&fastopenq->lock);
 | 
				
			||||||
 | 
						fastopenq->qlen--;
 | 
				
			||||||
 | 
						tcp_rsk(req)->listener = NULL;
 | 
				
			||||||
 | 
						if (req->sk)	/* the child socket hasn't been accepted yet */
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!reset || lsk->sk_state != TCP_LISTEN) {
 | 
				
			||||||
 | 
							/* If the listener has been closed don't bother with the
 | 
				
			||||||
 | 
							 * special RST handling below.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							spin_unlock_bh(&fastopenq->lock);
 | 
				
			||||||
 | 
							sock_put(lsk);
 | 
				
			||||||
 | 
							reqsk_free(req);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* Wait for 60secs before removing a req that has triggered RST.
 | 
				
			||||||
 | 
						 * This is a simple defense against TFO spoofing attack - by
 | 
				
			||||||
 | 
						 * counting the req against fastopen.max_qlen, and disabling
 | 
				
			||||||
 | 
						 * TFO when the qlen exceeds max_qlen.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * For more details see CoNext'11 "TCP Fast Open" paper.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						req->expires = jiffies + 60*HZ;
 | 
				
			||||||
 | 
						if (fastopenq->rskq_rst_head == NULL)
 | 
				
			||||||
 | 
							fastopenq->rskq_rst_head = req;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							fastopenq->rskq_rst_tail->dl_next = req;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						req->dl_next = NULL;
 | 
				
			||||||
 | 
						fastopenq->rskq_rst_tail = req;
 | 
				
			||||||
 | 
						fastopenq->qlen++;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						spin_unlock_bh(&fastopenq->lock);
 | 
				
			||||||
 | 
						sock_put(lsk);
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -149,6 +149,11 @@ void inet_sock_destruct(struct sock *sk)
 | 
				
			||||||
		pr_err("Attempt to release alive inet socket %p\n", sk);
 | 
							pr_err("Attempt to release alive inet socket %p\n", sk);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (sk->sk_type == SOCK_STREAM) {
 | 
				
			||||||
 | 
							struct fastopen_queue *fastopenq =
 | 
				
			||||||
 | 
								inet_csk(sk)->icsk_accept_queue.fastopenq;
 | 
				
			||||||
 | 
							kfree(fastopenq);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 | 
						WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 | 
				
			||||||
	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 | 
						WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 | 
				
			||||||
| 
						 | 
					@ -212,6 +217,26 @@ int inet_listen(struct socket *sock, int backlog)
 | 
				
			||||||
	 * we can only allow the backlog to be adjusted.
 | 
						 * we can only allow the backlog to be adjusted.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (old_state != TCP_LISTEN) {
 | 
						if (old_state != TCP_LISTEN) {
 | 
				
			||||||
 | 
							/* Check special setups for testing purpose to enable TFO w/o
 | 
				
			||||||
 | 
							 * requiring TCP_FASTOPEN sockopt.
 | 
				
			||||||
 | 
							 * Note that only TCP sockets (SOCK_STREAM) will reach here.
 | 
				
			||||||
 | 
							 * Also fastopenq may already been allocated because this
 | 
				
			||||||
 | 
							 * socket was in TCP_LISTEN state previously but was
 | 
				
			||||||
 | 
							 * shutdown() (rather than close()).
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
 | 
				
			||||||
 | 
							    inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
 | 
				
			||||||
 | 
								if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
 | 
				
			||||||
 | 
									err = fastopen_init_queue(sk, backlog);
 | 
				
			||||||
 | 
								else if ((sysctl_tcp_fastopen &
 | 
				
			||||||
 | 
									  TFO_SERVER_WO_SOCKOPT2) != 0)
 | 
				
			||||||
 | 
									err = fastopen_init_queue(sk,
 | 
				
			||||||
 | 
									    ((uint)sysctl_tcp_fastopen) >> 16);
 | 
				
			||||||
 | 
								else
 | 
				
			||||||
 | 
									err = 0;
 | 
				
			||||||
 | 
								if (err)
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		err = inet_csk_listen_start(sk, backlog);
 | 
							err = inet_csk_listen_start(sk, backlog);
 | 
				
			||||||
		if (err)
 | 
							if (err)
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
| 
						 | 
					@ -701,7 +726,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sock_rps_record_flow(sk2);
 | 
						sock_rps_record_flow(sk2);
 | 
				
			||||||
	WARN_ON(!((1 << sk2->sk_state) &
 | 
						WARN_ON(!((1 << sk2->sk_state) &
 | 
				
			||||||
		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
 | 
							  (TCPF_ESTABLISHED | TCPF_SYN_RECV |
 | 
				
			||||||
 | 
							  TCPF_CLOSE_WAIT | TCPF_CLOSE)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sock_graft(sk2, newsock);
 | 
						sock_graft(sk2, newsock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 | 
				
			||||||
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 | 
					struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
						struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
 | 
						struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 | 
				
			||||||
	struct sock *newsk;
 | 
						struct sock *newsk;
 | 
				
			||||||
 | 
						struct request_sock *req;
 | 
				
			||||||
	int error;
 | 
						int error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lock_sock(sk);
 | 
						lock_sock(sk);
 | 
				
			||||||
| 
						 | 
					@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 | 
				
			||||||
		goto out_err;
 | 
							goto out_err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Find already established connection */
 | 
						/* Find already established connection */
 | 
				
			||||||
	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
 | 
						if (reqsk_queue_empty(queue)) {
 | 
				
			||||||
		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 | 
							long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* If this is a non blocking socket don't sleep */
 | 
							/* If this is a non blocking socket don't sleep */
 | 
				
			||||||
| 
						 | 
					@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 | 
				
			||||||
		if (error)
 | 
							if (error)
 | 
				
			||||||
			goto out_err;
 | 
								goto out_err;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						req = reqsk_queue_remove(queue);
 | 
				
			||||||
 | 
						newsk = req->sk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
 | 
						sk_acceptq_removed(sk);
 | 
				
			||||||
	WARN_ON(newsk->sk_state == TCP_SYN_RECV);
 | 
						if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) {
 | 
				
			||||||
 | 
							spin_lock_bh(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
							if (tcp_rsk(req)->listener) {
 | 
				
			||||||
 | 
								/* We are still waiting for the final ACK from 3WHS
 | 
				
			||||||
 | 
								 * so can't free req now. Instead, we set req->sk to
 | 
				
			||||||
 | 
								 * NULL to signify that the child socket is taken
 | 
				
			||||||
 | 
								 * so reqsk_fastopen_remove() will free the req
 | 
				
			||||||
 | 
								 * when 3WHS finishes (or is aborted).
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								req->sk = NULL;
 | 
				
			||||||
 | 
								req = NULL;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							spin_unlock_bh(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	release_sock(sk);
 | 
						release_sock(sk);
 | 
				
			||||||
 | 
						if (req)
 | 
				
			||||||
 | 
							__reqsk_free(req);
 | 
				
			||||||
	return newsk;
 | 
						return newsk;
 | 
				
			||||||
out_err:
 | 
					out_err:
 | 
				
			||||||
	newsk = NULL;
 | 
						newsk = NULL;
 | 
				
			||||||
 | 
						req = NULL;
 | 
				
			||||||
	*err = error;
 | 
						*err = error;
 | 
				
			||||||
	goto out;
 | 
						goto out;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 | 
				
			||||||
void inet_csk_listen_stop(struct sock *sk)
 | 
					void inet_csk_listen_stop(struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
						struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
 | 
						struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 | 
				
			||||||
	struct request_sock *acc_req;
 | 
						struct request_sock *acc_req;
 | 
				
			||||||
	struct request_sock *req;
 | 
						struct request_sock *req;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inet_csk_delete_keepalive_timer(sk);
 | 
						inet_csk_delete_keepalive_timer(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* make all the listen_opt local to us */
 | 
						/* make all the listen_opt local to us */
 | 
				
			||||||
	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
 | 
						acc_req = reqsk_queue_yank_acceptq(queue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Following specs, it would be better either to send FIN
 | 
						/* Following specs, it would be better either to send FIN
 | 
				
			||||||
	 * (and enter FIN-WAIT-1, it is normal close)
 | 
						 * (and enter FIN-WAIT-1, it is normal close)
 | 
				
			||||||
| 
						 | 
					@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk)
 | 
				
			||||||
	 * To be honest, we are not able to make either
 | 
						 * To be honest, we are not able to make either
 | 
				
			||||||
	 * of the variants now.			--ANK
 | 
						 * of the variants now.			--ANK
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	reqsk_queue_destroy(&icsk->icsk_accept_queue);
 | 
						reqsk_queue_destroy(queue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while ((req = acc_req) != NULL) {
 | 
						while ((req = acc_req) != NULL) {
 | 
				
			||||||
		struct sock *child = req->sk;
 | 
							struct sock *child = req->sk;
 | 
				
			||||||
| 
						 | 
					@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		percpu_counter_inc(sk->sk_prot->orphan_count);
 | 
							percpu_counter_inc(sk->sk_prot->orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) {
 | 
				
			||||||
 | 
								BUG_ON(tcp_sk(child)->fastopen_rsk != req);
 | 
				
			||||||
 | 
								BUG_ON(sk != tcp_rsk(req)->listener);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* Paranoid, to prevent race condition if
 | 
				
			||||||
 | 
								 * an inbound pkt destined for child is
 | 
				
			||||||
 | 
								 * blocked by sock lock in tcp_v4_rcv().
 | 
				
			||||||
 | 
								 * Also to satisfy an assertion in
 | 
				
			||||||
 | 
								 * tcp_v4_destroy_sock().
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								tcp_sk(child)->fastopen_rsk = NULL;
 | 
				
			||||||
 | 
								sock_put(sk);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		inet_csk_destroy_sock(child);
 | 
							inet_csk_destroy_sock(child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		bh_unlock_sock(child);
 | 
							bh_unlock_sock(child);
 | 
				
			||||||
| 
						 | 
					@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk)
 | 
				
			||||||
		sk_acceptq_removed(sk);
 | 
							sk_acceptq_removed(sk);
 | 
				
			||||||
		__reqsk_free(req);
 | 
							__reqsk_free(req);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (queue->fastopenq != NULL) {
 | 
				
			||||||
 | 
							/* Free all the reqs queued in rskq_rst_head. */
 | 
				
			||||||
 | 
							spin_lock_bh(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
							acc_req = queue->fastopenq->rskq_rst_head;
 | 
				
			||||||
 | 
							queue->fastopenq->rskq_rst_head = NULL;
 | 
				
			||||||
 | 
							spin_unlock_bh(&queue->fastopenq->lock);
 | 
				
			||||||
 | 
							while ((req = acc_req) != NULL) {
 | 
				
			||||||
 | 
								acc_req = req->dl_next;
 | 
				
			||||||
 | 
								__reqsk_free(req);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	WARN_ON(sk->sk_ack_backlog);
 | 
						WARN_ON(sk->sk_ack_backlog);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 | 
					EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 | 
						ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 | 
				
			||||||
	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 | 
						req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 | 
				
			||||||
	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 | 
						treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 | 
				
			||||||
 | 
						treq->listener		= NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* We throwed the options of the initial SYN away, so we hope
 | 
						/* We throwed the options of the initial SYN away, so we hope
 | 
				
			||||||
	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 | 
						 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 | 
				
			||||||
	if (sk->sk_shutdown & RCV_SHUTDOWN)
 | 
						if (sk->sk_shutdown & RCV_SHUTDOWN)
 | 
				
			||||||
		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 | 
							mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Connected? */
 | 
						/* Connected or passive Fast Open socket? */
 | 
				
			||||||
	if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 | 
						if (sk->sk_state != TCP_SYN_SENT &&
 | 
				
			||||||
 | 
						    (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) {
 | 
				
			||||||
		int target = sock_rcvlowat(sk, 0, INT_MAX);
 | 
							int target = sock_rcvlowat(sk, 0, INT_MAX);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (tp->urg_seq == tp->copied_seq &&
 | 
							if (tp->urg_seq == tp->copied_seq &&
 | 
				
			||||||
| 
						 | 
					@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 | 
				
			||||||
	ssize_t copied;
 | 
						ssize_t copied;
 | 
				
			||||||
	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 | 
						long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Wait for a connection to finish. */
 | 
						/* Wait for a connection to finish. One exception is TCP Fast Open
 | 
				
			||||||
	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
 | 
						 * (passive side) where data is allowed to be sent before a connection
 | 
				
			||||||
 | 
						 * is fully established.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
 | 
				
			||||||
 | 
						    !tcp_passive_fastopen(sk)) {
 | 
				
			||||||
		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 | 
							if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 | 
				
			||||||
			goto out_err;
 | 
								goto out_err;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 | 
						clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 | 
						timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Wait for a connection to finish. */
 | 
						/* Wait for a connection to finish. One exception is TCP Fast Open
 | 
				
			||||||
	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
 | 
						 * (passive side) where data is allowed to be sent before a connection
 | 
				
			||||||
 | 
						 * is fully established.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
 | 
				
			||||||
 | 
						    !tcp_passive_fastopen(sk)) {
 | 
				
			||||||
		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 | 
							if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 | 
				
			||||||
			goto do_error;
 | 
								goto do_error;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(tp->repair)) {
 | 
						if (unlikely(tp->repair)) {
 | 
				
			||||||
		if (tp->repair_queue == TCP_RECV_QUEUE) {
 | 
							if (tp->repair_queue == TCP_RECV_QUEUE) {
 | 
				
			||||||
| 
						 | 
					@ -2144,6 +2155,10 @@ void tcp_close(struct sock *sk, long timeout)
 | 
				
			||||||
		 * they look as CLOSING or LAST_ACK for Linux)
 | 
							 * they look as CLOSING or LAST_ACK for Linux)
 | 
				
			||||||
		 * Probably, I missed some more holelets.
 | 
							 * Probably, I missed some more holelets.
 | 
				
			||||||
		 * 						--ANK
 | 
							 * 						--ANK
 | 
				
			||||||
 | 
							 * XXX (TFO) - To start off we don't support SYN+ACK+FIN
 | 
				
			||||||
 | 
							 * in a single packet! (May consider it later but will
 | 
				
			||||||
 | 
							 * probably need API support or TCP_CORK SYN-ACK until
 | 
				
			||||||
 | 
							 * data is written and socket is closed.)
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		tcp_send_fin(sk);
 | 
							tcp_send_fin(sk);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -2215,8 +2230,16 @@ void tcp_close(struct sock *sk, long timeout)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (sk->sk_state == TCP_CLOSE)
 | 
						if (sk->sk_state == TCP_CLOSE) {
 | 
				
			||||||
 | 
							struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 | 
				
			||||||
 | 
							/* We could get here with a non-NULL req if the socket is
 | 
				
			||||||
 | 
							 * aborted (e.g., closed with unread data) before 3WHS
 | 
				
			||||||
 | 
							 * finishes.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (req != NULL)
 | 
				
			||||||
 | 
								reqsk_fastopen_remove(sk, req, false);
 | 
				
			||||||
		inet_csk_destroy_sock(sk);
 | 
							inet_csk_destroy_sock(sk);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	/* Otherwise, socket is reprieved until protocol close. */
 | 
						/* Otherwise, socket is reprieved until protocol close. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
| 
						 | 
					@ -2688,6 +2711,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			icsk->icsk_user_timeout = msecs_to_jiffies(val);
 | 
								icsk->icsk_user_timeout = msecs_to_jiffies(val);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						case TCP_FASTOPEN:
 | 
				
			||||||
 | 
							if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
 | 
				
			||||||
 | 
							    TCPF_LISTEN)))
 | 
				
			||||||
 | 
								err = fastopen_init_queue(sk, val);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								err = -EINVAL;
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		err = -ENOPROTOOPT;
 | 
							err = -ENOPROTOOPT;
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					@ -3501,11 +3532,15 @@ EXPORT_SYMBOL(tcp_cookie_generator);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void tcp_done(struct sock *sk)
 | 
					void tcp_done(struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
 | 
						if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
 | 
				
			||||||
		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 | 
							TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tcp_set_state(sk, TCP_CLOSE);
 | 
						tcp_set_state(sk, TCP_CLOSE);
 | 
				
			||||||
	tcp_clear_xmit_timers(sk);
 | 
						tcp_clear_xmit_timers(sk);
 | 
				
			||||||
 | 
						if (req != NULL)
 | 
				
			||||||
 | 
							reqsk_fastopen_remove(sk, req, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk->sk_shutdown = SHUTDOWN_MASK;
 | 
						sk->sk_shutdown = SHUTDOWN_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -839,7 +839,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 | 
						if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb = tcp_make_synack(sk, dst, req, rvp);
 | 
						skb = tcp_make_synack(sk, dst, req, rvp, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (skb) {
 | 
						if (skb) {
 | 
				
			||||||
		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 | 
							__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 | 
				
			||||||
| 
						 | 
					@ -1554,7 +1554,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
 | 
						struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
 | 
				
			||||||
						       iph->saddr, iph->daddr);
 | 
											       iph->saddr, iph->daddr);
 | 
				
			||||||
	if (req)
 | 
						if (req)
 | 
				
			||||||
		return tcp_check_req(sk, skb, req, prev);
 | 
							return tcp_check_req(sk, skb, req, prev, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
 | 
						nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
 | 
				
			||||||
			th->source, iph->daddr, th->dest, inet_iif(skb));
 | 
								th->source, iph->daddr, th->dest, inet_iif(skb));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -507,6 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 | 
				
			||||||
			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 | 
								newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 | 
				
			||||||
		newtp->rx_opt.mss_clamp = req->mss;
 | 
							newtp->rx_opt.mss_clamp = req->mss;
 | 
				
			||||||
		TCP_ECN_openreq_child(newtp, req);
 | 
							TCP_ECN_openreq_child(newtp, req);
 | 
				
			||||||
 | 
							newtp->fastopen_rsk = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 | 
							TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -515,13 +516,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 | 
				
			||||||
EXPORT_SYMBOL(tcp_create_openreq_child);
 | 
					EXPORT_SYMBOL(tcp_create_openreq_child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 *	Process an incoming packet for SYN_RECV sockets represented
 | 
					 * Process an incoming packet for SYN_RECV sockets represented as a
 | 
				
			||||||
 *	as a request_sock.
 | 
					 * request_sock. Normally sk is the listener socket but for TFO it
 | 
				
			||||||
 | 
					 * points to the child socket.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * XXX (TFO) - The current impl contains a special check for ack
 | 
				
			||||||
 | 
					 * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
					struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
			   struct request_sock *req,
 | 
								   struct request_sock *req,
 | 
				
			||||||
			   struct request_sock **prev)
 | 
								   struct request_sock **prev,
 | 
				
			||||||
 | 
								   bool fastopen)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_options_received tmp_opt;
 | 
						struct tcp_options_received tmp_opt;
 | 
				
			||||||
	const u8 *hash_location;
 | 
						const u8 *hash_location;
 | 
				
			||||||
| 
						 | 
					@ -530,6 +536,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 | 
						__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 | 
				
			||||||
	bool paws_reject = false;
 | 
						bool paws_reject = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tmp_opt.saw_tstamp = 0;
 | 
						tmp_opt.saw_tstamp = 0;
 | 
				
			||||||
	if (th->doff > (sizeof(struct tcphdr)>>2)) {
 | 
						if (th->doff > (sizeof(struct tcphdr)>>2)) {
 | 
				
			||||||
		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
 | 
							tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
 | 
				
			||||||
| 
						 | 
					@ -565,6 +573,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
		 *
 | 
							 *
 | 
				
			||||||
		 * Enforce "SYN-ACK" according to figure 8, figure 6
 | 
							 * Enforce "SYN-ACK" according to figure 8, figure 6
 | 
				
			||||||
		 * of RFC793, fixed by RFC1122.
 | 
							 * of RFC793, fixed by RFC1122.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Note that even if there is new data in the SYN packet
 | 
				
			||||||
 | 
							 * they will be thrown away too.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 | 
							req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					@ -622,9 +633,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
	 *                  sent (the segment carries an unacceptable ACK) ...
 | 
						 *                  sent (the segment carries an unacceptable ACK) ...
 | 
				
			||||||
	 *                  a reset is sent."
 | 
						 *                  a reset is sent."
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
	 * Invalid ACK: reset will be sent by listening socket
 | 
						 * Invalid ACK: reset will be sent by listening socket.
 | 
				
			||||||
 | 
						 * Note that the ACK validity check for a Fast Open socket is done
 | 
				
			||||||
 | 
						 * elsewhere and is checked directly against the child socket rather
 | 
				
			||||||
 | 
						 * than req because user data may have been sent out.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if ((flg & TCP_FLAG_ACK) &&
 | 
						if ((flg & TCP_FLAG_ACK) && !fastopen &&
 | 
				
			||||||
	    (TCP_SKB_CB(skb)->ack_seq !=
 | 
						    (TCP_SKB_CB(skb)->ack_seq !=
 | 
				
			||||||
	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 | 
						     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 | 
				
			||||||
		return sk;
 | 
							return sk;
 | 
				
			||||||
| 
						 | 
					@ -637,7 +651,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
	/* RFC793: "first check sequence number". */
 | 
						/* RFC793: "first check sequence number". */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 | 
						if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 | 
				
			||||||
					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
 | 
										  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
 | 
				
			||||||
		/* Out of window: send ACK and drop. */
 | 
							/* Out of window: send ACK and drop. */
 | 
				
			||||||
		if (!(flg & TCP_FLAG_RST))
 | 
							if (!(flg & TCP_FLAG_RST))
 | 
				
			||||||
			req->rsk_ops->send_ack(sk, skb, req);
 | 
								req->rsk_ops->send_ack(sk, skb, req);
 | 
				
			||||||
| 
						 | 
					@ -648,7 +662,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* In sequence, PAWS is OK. */
 | 
						/* In sequence, PAWS is OK. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
 | 
						if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
 | 
				
			||||||
		req->ts_recent = tmp_opt.rcv_tsval;
 | 
							req->ts_recent = tmp_opt.rcv_tsval;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
 | 
						if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
 | 
				
			||||||
| 
						 | 
					@ -667,10 +681,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* ACK sequence verified above, just make sure ACK is
 | 
						/* ACK sequence verified above, just make sure ACK is
 | 
				
			||||||
	 * set.  If ACK not set, just silently drop the packet.
 | 
						 * set.  If ACK not set, just silently drop the packet.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * XXX (TFO) - if we ever allow "data after SYN", the
 | 
				
			||||||
 | 
						 * following check needs to be removed.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!(flg & TCP_FLAG_ACK))
 | 
						if (!(flg & TCP_FLAG_ACK))
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* For Fast Open no more processing is needed (sk is the
 | 
				
			||||||
 | 
						 * child socket).
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (fastopen)
 | 
				
			||||||
 | 
							return sk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
 | 
						/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
 | 
				
			||||||
	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 | 
						if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 | 
				
			||||||
	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 | 
						    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 | 
				
			||||||
| 
						 | 
					@ -706,11 +729,21 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
embryonic_reset:
 | 
					embryonic_reset:
 | 
				
			||||||
	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
 | 
						if (!(flg & TCP_FLAG_RST)) {
 | 
				
			||||||
	if (!(flg & TCP_FLAG_RST))
 | 
							/* Received a bad SYN pkt - for TFO We try not to reset
 | 
				
			||||||
 | 
							 * the local connection unless it's really necessary to
 | 
				
			||||||
 | 
							 * avoid becoming vulnerable to outside attack aiming at
 | 
				
			||||||
 | 
							 * resetting legit local connections.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
		req->rsk_ops->send_reset(sk, skb);
 | 
							req->rsk_ops->send_reset(sk, skb);
 | 
				
			||||||
 | 
						} else if (fastopen) { /* received a valid RST pkt */
 | 
				
			||||||
 | 
							reqsk_fastopen_remove(sk, req, true);
 | 
				
			||||||
 | 
							tcp_reset(sk);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (!fastopen) {
 | 
				
			||||||
		inet_csk_reqsk_queue_drop(sk, req, prev);
 | 
							inet_csk_reqsk_queue_drop(sk, req, prev);
 | 
				
			||||||
 | 
							NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(tcp_check_req);
 | 
					EXPORT_SYMBOL(tcp_check_req);
 | 
				
			||||||
| 
						 | 
					@ -719,6 +752,12 @@ EXPORT_SYMBOL(tcp_check_req);
 | 
				
			||||||
 * Queue segment on the new socket if the new socket is active,
 | 
					 * Queue segment on the new socket if the new socket is active,
 | 
				
			||||||
 * otherwise we just shortcircuit this and continue with
 | 
					 * otherwise we just shortcircuit this and continue with
 | 
				
			||||||
 * the new socket.
 | 
					 * the new socket.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
 | 
				
			||||||
 | 
					 * when entering. But other states are possible due to a race condition
 | 
				
			||||||
 | 
					 * where after __inet_lookup_established() fails but before the listener
 | 
				
			||||||
 | 
					 * locked is obtained, other packets cause the same connection to
 | 
				
			||||||
 | 
					 * be created.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int tcp_child_process(struct sock *parent, struct sock *child,
 | 
					int tcp_child_process(struct sock *parent, struct sock *child,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk,
 | 
				
			||||||
				   unsigned int mss, struct sk_buff *skb,
 | 
									   unsigned int mss, struct sk_buff *skb,
 | 
				
			||||||
				   struct tcp_out_options *opts,
 | 
									   struct tcp_out_options *opts,
 | 
				
			||||||
				   struct tcp_md5sig_key **md5,
 | 
									   struct tcp_md5sig_key **md5,
 | 
				
			||||||
				   struct tcp_extend_values *xvp)
 | 
									   struct tcp_extend_values *xvp,
 | 
				
			||||||
 | 
									   struct tcp_fastopen_cookie *foc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inet_request_sock *ireq = inet_rsk(req);
 | 
						struct inet_request_sock *ireq = inet_rsk(req);
 | 
				
			||||||
	unsigned int remaining = MAX_TCP_OPTION_SPACE;
 | 
						unsigned int remaining = MAX_TCP_OPTION_SPACE;
 | 
				
			||||||
| 
						 | 
					@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk,
 | 
				
			||||||
		if (unlikely(!ireq->tstamp_ok))
 | 
							if (unlikely(!ireq->tstamp_ok))
 | 
				
			||||||
			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 | 
								remaining -= TCPOLEN_SACKPERM_ALIGNED;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (foc != NULL) {
 | 
				
			||||||
 | 
							u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
 | 
				
			||||||
 | 
							need = (need + 3) & ~3U;  /* Align to 32 bits */
 | 
				
			||||||
 | 
							if (remaining >= need) {
 | 
				
			||||||
 | 
								opts->options |= OPTION_FAST_OPEN_COOKIE;
 | 
				
			||||||
 | 
								opts->fastopen_cookie = foc;
 | 
				
			||||||
 | 
								remaining -= need;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	/* Similar rationale to tcp_syn_options() applies here, too.
 | 
						/* Similar rationale to tcp_syn_options() applies here, too.
 | 
				
			||||||
	 * If the <SYN> options fit, the same options should fit now!
 | 
						 * If the <SYN> options fit, the same options should fit now!
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
| 
						 | 
					@ -2658,7 +2667,8 @@ int tcp_send_synack(struct sock *sk)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
					struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
				struct request_sock *req,
 | 
									struct request_sock *req,
 | 
				
			||||||
				struct request_values *rvp)
 | 
									struct request_values *rvp,
 | 
				
			||||||
 | 
									struct tcp_fastopen_cookie *foc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_out_options opts;
 | 
						struct tcp_out_options opts;
 | 
				
			||||||
	struct tcp_extend_values *xvp = tcp_xv(rvp);
 | 
						struct tcp_extend_values *xvp = tcp_xv(rvp);
 | 
				
			||||||
| 
						 | 
					@ -2718,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 | 
						TCP_SKB_CB(skb)->when = tcp_time_stamp;
 | 
				
			||||||
	tcp_header_size = tcp_synack_options(sk, req, mss,
 | 
						tcp_header_size = tcp_synack_options(sk, req, mss,
 | 
				
			||||||
					     skb, &opts, &md5, xvp)
 | 
										     skb, &opts, &md5, xvp, foc)
 | 
				
			||||||
			+ sizeof(*th);
 | 
								+ sizeof(*th);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb_push(skb, tcp_header_size);
 | 
						skb_push(skb, tcp_header_size);
 | 
				
			||||||
| 
						 | 
					@ -2772,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 | 
						th->seq = htonl(TCP_SKB_CB(skb)->seq);
 | 
				
			||||||
	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 | 
						/* XXX data is queued and acked as is. No buffer/window check */
 | 
				
			||||||
 | 
						th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 | 
						/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 | 
				
			||||||
	th->window = htons(min(req->rcv_wnd, 65535U));
 | 
						th->window = htons(min(req->rcv_wnd, 65535U));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -304,6 +304,35 @@ static void tcp_probe_timer(struct sock *sk)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 *	Timer for Fast Open socket to retransmit SYNACK. Note that the
 | 
				
			||||||
 | 
					 *	sk here is the child socket, not the parent (listener) socket.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void tcp_fastopen_synack_timer(struct sock *sk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inet_connection_sock *icsk = inet_csk(sk);
 | 
				
			||||||
 | 
						int max_retries = icsk->icsk_syn_retries ? :
 | 
				
			||||||
 | 
						    sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
 | 
				
			||||||
 | 
						struct request_sock *req;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						req = tcp_sk(sk)->fastopen_rsk;
 | 
				
			||||||
 | 
						req->rsk_ops->syn_ack_timeout(sk, req);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (req->retrans >= max_retries) {
 | 
				
			||||||
 | 
							tcp_write_err(sk);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
 | 
				
			||||||
 | 
						 * returned from rtx_syn_ack() to make it more persistent like
 | 
				
			||||||
 | 
						 * regular retransmit because if the child socket has been accepted
 | 
				
			||||||
 | 
						 * it's not good to give up too easily.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 | 
				
			||||||
 | 
						req->retrans++;
 | 
				
			||||||
 | 
						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 | 
				
			||||||
 | 
								  TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 *	The TCP retransmit timer.
 | 
					 *	The TCP retransmit timer.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -317,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk)
 | 
				
			||||||
		tcp_resume_early_retransmit(sk);
 | 
							tcp_resume_early_retransmit(sk);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (tp->fastopen_rsk) {
 | 
				
			||||||
 | 
							BUG_ON(sk->sk_state != TCP_SYN_RECV &&
 | 
				
			||||||
 | 
							    sk->sk_state != TCP_FIN_WAIT1);
 | 
				
			||||||
 | 
							tcp_fastopen_synack_timer(sk);
 | 
				
			||||||
 | 
							/* Before we receive ACK to our SYN-ACK don't retransmit
 | 
				
			||||||
 | 
							 * anything else (e.g., data or FIN segments).
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if (!tp->packets_out)
 | 
						if (!tp->packets_out)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	ireq = inet_rsk(req);
 | 
						ireq = inet_rsk(req);
 | 
				
			||||||
	ireq6 = inet6_rsk(req);
 | 
						ireq6 = inet6_rsk(req);
 | 
				
			||||||
	treq = tcp_rsk(req);
 | 
						treq = tcp_rsk(req);
 | 
				
			||||||
 | 
						treq->listener = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (security_inet_conn_request(sk, skb, req))
 | 
						if (security_inet_conn_request(sk, skb, req))
 | 
				
			||||||
		goto out_free;
 | 
							goto out_free;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -475,7 +475,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 | 
				
			||||||
	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 | 
						if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 | 
				
			||||||
		goto done;
 | 
							goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb = tcp_make_synack(sk, dst, req, rvp);
 | 
						skb = tcp_make_synack(sk, dst, req, rvp, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (skb) {
 | 
						if (skb) {
 | 
				
			||||||
		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 | 
							__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 | 
				
			||||||
| 
						 | 
					@ -987,7 +987,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 | 
				
			||||||
				   &ipv6_hdr(skb)->saddr,
 | 
									   &ipv6_hdr(skb)->saddr,
 | 
				
			||||||
				   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 | 
									   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 | 
				
			||||||
	if (req)
 | 
						if (req)
 | 
				
			||||||
		return tcp_check_req(sk, skb, req, prev);
 | 
							return tcp_check_req(sk, skb, req, prev, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
 | 
						nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
 | 
				
			||||||
			&ipv6_hdr(skb)->saddr, th->source,
 | 
								&ipv6_hdr(skb)->saddr, th->source,
 | 
				
			||||||
| 
						 | 
					@ -1179,6 +1179,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	    want_cookie)
 | 
						    want_cookie)
 | 
				
			||||||
		goto drop_and_free;
 | 
							goto drop_and_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						tcp_rsk(req)->listener = NULL;
 | 
				
			||||||
	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 | 
						inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue