forked from mirrors/linux
		
	mptcp: Handle MP_CAPABLE options for outgoing connections
Add hooks to tcp_output.c to add MP_CAPABLE to an outgoing SYN request, to capture the MP_CAPABLE in the received SYN-ACK, to add MP_CAPABLE to the final ACK of the three-way handshake. Use the .sk_rx_dst_set() handler in the subflow proto to capture when the responding SYN-ACK is received and notify the MPTCP connection layer. Co-developed-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									2303f994b3
								
							
						
					
					
						commit
						cec37a6e41
					
				
					 9 changed files with 663 additions and 24 deletions
				
			
		|  | @ -137,6 +137,9 @@ struct tcp_request_sock { | ||||||
| 	const struct tcp_request_sock_ops *af_specific; | 	const struct tcp_request_sock_ops *af_specific; | ||||||
| 	u64				snt_synack; /* first SYNACK sent time */ | 	u64				snt_synack; /* first SYNACK sent time */ | ||||||
| 	bool				tfo_listener; | 	bool				tfo_listener; | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP) | ||||||
|  | 	bool				is_mptcp; | ||||||
|  | #endif | ||||||
| 	u32				txhash; | 	u32				txhash; | ||||||
| 	u32				rcv_isn; | 	u32				rcv_isn; | ||||||
| 	u32				snt_isn; | 	u32				snt_isn; | ||||||
|  |  | ||||||
|  | @ -39,8 +39,27 @@ struct mptcp_out_options { | ||||||
| 
 | 
 | ||||||
| void mptcp_init(void); | void mptcp_init(void); | ||||||
| 
 | 
 | ||||||
|  | static inline bool sk_is_mptcp(const struct sock *sk) | ||||||
|  | { | ||||||
|  | 	return tcp_sk(sk)->is_mptcp; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool rsk_is_mptcp(const struct request_sock *req) | ||||||
|  | { | ||||||
|  | 	return tcp_rsk(req)->is_mptcp; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void mptcp_parse_option(const unsigned char *ptr, int opsize, | void mptcp_parse_option(const unsigned char *ptr, int opsize, | ||||||
| 			struct tcp_options_received *opt_rx); | 			struct tcp_options_received *opt_rx); | ||||||
|  | bool mptcp_syn_options(struct sock *sk, unsigned int *size, | ||||||
|  | 		       struct mptcp_out_options *opts); | ||||||
|  | void mptcp_rcv_synsent(struct sock *sk); | ||||||
|  | bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, | ||||||
|  | 			  struct mptcp_out_options *opts); | ||||||
|  | bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, | ||||||
|  | 			       unsigned int *size, unsigned int remaining, | ||||||
|  | 			       struct mptcp_out_options *opts); | ||||||
|  | 
 | ||||||
| void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); | void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts); | ||||||
| 
 | 
 | ||||||
| /* move the skb extension owership, with the assumption that 'to' is
 | /* move the skb extension owership, with the assumption that 'to' is
 | ||||||
|  | @ -89,11 +108,47 @@ static inline void mptcp_init(void) | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool sk_is_mptcp(const struct sock *sk) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool rsk_is_mptcp(const struct request_sock *req) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void mptcp_parse_option(const unsigned char *ptr, int opsize, | static inline void mptcp_parse_option(const unsigned char *ptr, int opsize, | ||||||
| 				      struct tcp_options_received *opt_rx) | 				      struct tcp_options_received *opt_rx) | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size, | ||||||
|  | 				     struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void mptcp_rcv_synsent(struct sock *sk) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool mptcp_synack_options(const struct request_sock *req, | ||||||
|  | 					unsigned int *size, | ||||||
|  | 					struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool mptcp_established_options(struct sock *sk, | ||||||
|  | 					     struct sk_buff *skb, | ||||||
|  | 					     unsigned int *size, | ||||||
|  | 					     unsigned int remaining, | ||||||
|  | 					     struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void mptcp_skb_ext_move(struct sk_buff *to, | static inline void mptcp_skb_ext_move(struct sk_buff *to, | ||||||
| 				      const struct sk_buff *from) | 				      const struct sk_buff *from) | ||||||
| { | { | ||||||
|  | @ -107,6 +162,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, | ||||||
| 
 | 
 | ||||||
| #endif /* CONFIG_MPTCP */ | #endif /* CONFIG_MPTCP */ | ||||||
| 
 | 
 | ||||||
|  | void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped); | ||||||
|  | 
 | ||||||
| #if IS_ENABLED(CONFIG_MPTCP_IPV6) | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
| int mptcpv6_init(void); | int mptcpv6_init(void); | ||||||
| #elif IS_ENABLED(CONFIG_IPV6) | #elif IS_ENABLED(CONFIG_IPV6) | ||||||
|  |  | ||||||
|  | @ -5978,6 +5978,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | ||||||
| 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); | ||||||
| 		tcp_initialize_rcv_mss(sk); | 		tcp_initialize_rcv_mss(sk); | ||||||
| 
 | 
 | ||||||
|  | 		if (sk_is_mptcp(sk)) | ||||||
|  | 			mptcp_rcv_synsent(sk); | ||||||
|  | 
 | ||||||
| 		/* Remember, tcp_poll() does not lock socket!
 | 		/* Remember, tcp_poll() does not lock socket!
 | ||||||
| 		 * Change state from SYN-SENT only after copied_seq | 		 * Change state from SYN-SENT only after copied_seq | ||||||
| 		 * is initialized. */ | 		 * is initialized. */ | ||||||
|  | @ -6600,6 +6603,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | ||||||
| 
 | 
 | ||||||
| 	tcp_rsk(req)->af_specific = af_ops; | 	tcp_rsk(req)->af_specific = af_ops; | ||||||
| 	tcp_rsk(req)->ts_off = 0; | 	tcp_rsk(req)->ts_off = 0; | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP) | ||||||
|  | 	tcp_rsk(req)->is_mptcp = 0; | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| 	tcp_clear_options(&tmp_opt); | 	tcp_clear_options(&tmp_opt); | ||||||
| 	tmp_opt.mss_clamp = af_ops->mss_clamp; | 	tmp_opt.mss_clamp = af_ops->mss_clamp; | ||||||
|  |  | ||||||
|  | @ -597,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp, | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void mptcp_set_option_cond(const struct request_sock *req, | ||||||
|  | 				  struct tcp_out_options *opts, | ||||||
|  | 				  unsigned int *remaining) | ||||||
|  | { | ||||||
|  | 	if (rsk_is_mptcp(req)) { | ||||||
|  | 		unsigned int size; | ||||||
|  | 
 | ||||||
|  | 		if (mptcp_synack_options(req, &size, &opts->mptcp)) { | ||||||
|  | 			if (*remaining >= size) { | ||||||
|  | 				opts->options |= OPTION_MPTCP; | ||||||
|  | 				*remaining -= size; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Compute TCP options for SYN packets. This is not the final
 | /* Compute TCP options for SYN packets. This is not the final
 | ||||||
|  * network wire format yet. |  * network wire format yet. | ||||||
|  */ |  */ | ||||||
|  | @ -666,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | ||||||
| 
 | 
 | ||||||
| 	smc_set_option(tp, opts, &remaining); | 	smc_set_option(tp, opts, &remaining); | ||||||
| 
 | 
 | ||||||
|  | 	if (sk_is_mptcp(sk)) { | ||||||
|  | 		unsigned int size; | ||||||
|  | 
 | ||||||
|  | 		if (mptcp_syn_options(sk, &size, &opts->mptcp)) { | ||||||
|  | 			opts->options |= OPTION_MPTCP; | ||||||
|  | 			remaining -= size; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	return MAX_TCP_OPTION_SPACE - remaining; | 	return MAX_TCP_OPTION_SPACE - remaining; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -727,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	mptcp_set_option_cond(req, opts, &remaining); | ||||||
|  | 
 | ||||||
| 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); | 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); | ||||||
| 
 | 
 | ||||||
| 	return MAX_TCP_OPTION_SPACE - remaining; | 	return MAX_TCP_OPTION_SPACE - remaining; | ||||||
|  | @ -764,6 +791,23 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | ||||||
| 		size += TCPOLEN_TSTAMP_ALIGNED; | 		size += TCPOLEN_TSTAMP_ALIGNED; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/* MPTCP options have precedence over SACK for the limited TCP
 | ||||||
|  | 	 * option space because a MPTCP connection would be forced to | ||||||
|  | 	 * fall back to regular TCP if a required multipath option is | ||||||
|  | 	 * missing. SACK still gets a chance to use whatever space is | ||||||
|  | 	 * left. | ||||||
|  | 	 */ | ||||||
|  | 	if (sk_is_mptcp(sk)) { | ||||||
|  | 		unsigned int remaining = MAX_TCP_OPTION_SPACE - size; | ||||||
|  | 		unsigned int opt_size = 0; | ||||||
|  | 
 | ||||||
|  | 		if (mptcp_established_options(sk, skb, &opt_size, remaining, | ||||||
|  | 					      &opts->mptcp)) { | ||||||
|  | 			opts->options |= OPTION_MPTCP; | ||||||
|  | 			size += opt_size; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | 	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | ||||||
| 	if (unlikely(eff_sacks)) { | 	if (unlikely(eff_sacks)) { | ||||||
| 		const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; | 		const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; | ||||||
|  |  | ||||||
|  | @ -238,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | ||||||
| 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; | 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; | ||||||
| 
 | 
 | ||||||
| 		icsk->icsk_af_ops = &ipv6_mapped; | 		icsk->icsk_af_ops = &ipv6_mapped; | ||||||
|  | 		if (sk_is_mptcp(sk)) | ||||||
|  | 			mptcp_handle_ipv6_mapped(sk, true); | ||||||
| 		sk->sk_backlog_rcv = tcp_v4_do_rcv; | 		sk->sk_backlog_rcv = tcp_v4_do_rcv; | ||||||
| #ifdef CONFIG_TCP_MD5SIG | #ifdef CONFIG_TCP_MD5SIG | ||||||
| 		tp->af_specific = &tcp_sock_ipv6_mapped_specific; | 		tp->af_specific = &tcp_sock_ipv6_mapped_specific; | ||||||
|  | @ -248,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | ||||||
| 		if (err) { | 		if (err) { | ||||||
| 			icsk->icsk_ext_hdr_len = exthdrlen; | 			icsk->icsk_ext_hdr_len = exthdrlen; | ||||||
| 			icsk->icsk_af_ops = &ipv6_specific; | 			icsk->icsk_af_ops = &ipv6_specific; | ||||||
|  | 			if (sk_is_mptcp(sk)) | ||||||
|  | 				mptcp_handle_ipv6_mapped(sk, false); | ||||||
| 			sk->sk_backlog_rcv = tcp_v6_do_rcv; | 			sk->sk_backlog_rcv = tcp_v6_do_rcv; | ||||||
| #ifdef CONFIG_TCP_MD5SIG | #ifdef CONFIG_TCP_MD5SIG | ||||||
| 			tp->af_specific = &tcp_sock_ipv6_specific; | 			tp->af_specific = &tcp_sock_ipv6_specific; | ||||||
|  | @ -1203,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * | ||||||
| 		newnp->saddr = newsk->sk_v6_rcv_saddr; | 		newnp->saddr = newsk->sk_v6_rcv_saddr; | ||||||
| 
 | 
 | ||||||
| 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; | 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; | ||||||
|  | 		if (sk_is_mptcp(newsk)) | ||||||
|  | 			mptcp_handle_ipv6_mapped(newsk, true); | ||||||
| 		newsk->sk_backlog_rcv = tcp_v4_do_rcv; | 		newsk->sk_backlog_rcv = tcp_v4_do_rcv; | ||||||
| #ifdef CONFIG_TCP_MD5SIG | #ifdef CONFIG_TCP_MD5SIG | ||||||
| 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific; | 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific; | ||||||
|  |  | ||||||
|  | @ -72,14 +72,114 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void mptcp_get_options(const struct sk_buff *skb, | ||||||
|  | 		       struct tcp_options_received *opt_rx) | ||||||
|  | { | ||||||
|  | 	const unsigned char *ptr; | ||||||
|  | 	const struct tcphdr *th = tcp_hdr(skb); | ||||||
|  | 	int length = (th->doff * 4) - sizeof(struct tcphdr); | ||||||
|  | 
 | ||||||
|  | 	ptr = (const unsigned char *)(th + 1); | ||||||
|  | 
 | ||||||
|  | 	while (length > 0) { | ||||||
|  | 		int opcode = *ptr++; | ||||||
|  | 		int opsize; | ||||||
|  | 
 | ||||||
|  | 		switch (opcode) { | ||||||
|  | 		case TCPOPT_EOL: | ||||||
|  | 			return; | ||||||
|  | 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */ | ||||||
|  | 			length--; | ||||||
|  | 			continue; | ||||||
|  | 		default: | ||||||
|  | 			opsize = *ptr++; | ||||||
|  | 			if (opsize < 2) /* "silly options" */ | ||||||
|  | 				return; | ||||||
|  | 			if (opsize > length) | ||||||
|  | 				return;	/* don't parse partial options */ | ||||||
|  | 			if (opcode == TCPOPT_MPTCP) | ||||||
|  | 				mptcp_parse_option(ptr, opsize, opt_rx); | ||||||
|  | 			ptr += opsize - 2; | ||||||
|  | 			length -= opsize; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool mptcp_syn_options(struct sock *sk, unsigned int *size, | ||||||
|  | 		       struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 
 | ||||||
|  | 	if (subflow->request_mptcp) { | ||||||
|  | 		pr_debug("local_key=%llu", subflow->local_key); | ||||||
|  | 		opts->suboptions = OPTION_MPTCP_MPC_SYN; | ||||||
|  | 		opts->sndr_key = subflow->local_key; | ||||||
|  | 		*size = TCPOLEN_MPTCP_MPC_SYN; | ||||||
|  | 		return true; | ||||||
|  | 	} | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void mptcp_rcv_synsent(struct sock *sk) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 	struct tcp_sock *tp = tcp_sk(sk); | ||||||
|  | 
 | ||||||
|  | 	pr_debug("subflow=%p", subflow); | ||||||
|  | 	if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { | ||||||
|  | 		subflow->mp_capable = 1; | ||||||
|  | 		subflow->remote_key = tp->rx_opt.mptcp.sndr_key; | ||||||
|  | 	} else { | ||||||
|  | 		tcp_sk(sk)->is_mptcp = 0; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, | ||||||
|  | 			       unsigned int *size, unsigned int remaining, | ||||||
|  | 			       struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 
 | ||||||
|  | 	if (subflow->mp_capable && !subflow->fourth_ack) { | ||||||
|  | 		opts->suboptions = OPTION_MPTCP_MPC_ACK; | ||||||
|  | 		opts->sndr_key = subflow->local_key; | ||||||
|  | 		opts->rcvr_key = subflow->remote_key; | ||||||
|  | 		*size = TCPOLEN_MPTCP_MPC_ACK; | ||||||
|  | 		subflow->fourth_ack = 1; | ||||||
|  | 		pr_debug("subflow=%p, local_key=%llu, remote_key=%llu", | ||||||
|  | 			 subflow, subflow->local_key, subflow->remote_key); | ||||||
|  | 		return true; | ||||||
|  | 	} | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, | ||||||
|  | 			  struct mptcp_out_options *opts) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); | ||||||
|  | 
 | ||||||
|  | 	if (subflow_req->mp_capable) { | ||||||
|  | 		opts->suboptions = OPTION_MPTCP_MPC_SYNACK; | ||||||
|  | 		opts->sndr_key = subflow_req->local_key; | ||||||
|  | 		*size = TCPOLEN_MPTCP_MPC_SYNACK; | ||||||
|  | 		pr_debug("subflow_req=%p, local_key=%llu", | ||||||
|  | 			 subflow_req, subflow_req->local_key); | ||||||
|  | 		return true; | ||||||
|  | 	} | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) | void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) | ||||||
| { | { | ||||||
| 	if ((OPTION_MPTCP_MPC_SYN | | 	if ((OPTION_MPTCP_MPC_SYN | | ||||||
|  | 	     OPTION_MPTCP_MPC_SYNACK | | ||||||
| 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) { | 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) { | ||||||
| 		u8 len; | 		u8 len; | ||||||
| 
 | 
 | ||||||
| 		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) | 		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) | ||||||
| 			len = TCPOLEN_MPTCP_MPC_SYN; | 			len = TCPOLEN_MPTCP_MPC_SYN; | ||||||
|  | 		else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) | ||||||
|  | 			len = TCPOLEN_MPTCP_MPC_SYNACK; | ||||||
| 		else | 		else | ||||||
| 			len = TCPOLEN_MPTCP_MPC_ACK; | 			len = TCPOLEN_MPTCP_MPC_ACK; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -25,12 +25,28 @@ | ||||||
|  */ |  */ | ||||||
| static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) | static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) | ||||||
| { | { | ||||||
| 	if (!msk->subflow) | 	if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	return msk->subflow; | 	return msk->subflow; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* if msk has a single subflow, and the mp_capable handshake is failed,
 | ||||||
|  |  * return it. | ||||||
|  |  * Otherwise returns NULL | ||||||
|  |  */ | ||||||
|  | static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk) | ||||||
|  | { | ||||||
|  | 	struct socket *ssock = __mptcp_nmpc_socket(msk); | ||||||
|  | 
 | ||||||
|  | 	sock_owned_by_me((const struct sock *)msk); | ||||||
|  | 
 | ||||||
|  | 	if (!ssock || sk_is_mptcp(ssock->sk)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	return ssock; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) | static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) | ||||||
| { | { | ||||||
| 	return ((struct sock *)msk)->sk_state == TCP_CLOSE; | 	return ((struct sock *)msk)->sk_state == TCP_CLOSE; | ||||||
|  | @ -56,6 +72,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) | ||||||
| 
 | 
 | ||||||
| 	msk->subflow = ssock; | 	msk->subflow = ssock; | ||||||
| 	subflow = mptcp_subflow_ctx(ssock->sk); | 	subflow = mptcp_subflow_ctx(ssock->sk); | ||||||
|  | 	list_add(&subflow->node, &msk->conn_list); | ||||||
| 	subflow->request_mptcp = 1; | 	subflow->request_mptcp = 1; | ||||||
| 
 | 
 | ||||||
| set_state: | set_state: | ||||||
|  | @ -64,66 +81,169 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) | ||||||
| 	return ssock; | 	return ssock; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow; | ||||||
|  | 
 | ||||||
|  | 	sock_owned_by_me((const struct sock *)msk); | ||||||
|  | 
 | ||||||
|  | 	mptcp_for_each_subflow(msk, subflow) { | ||||||
|  | 		return mptcp_subflow_tcp_sock(subflow); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | ||||||
| { | { | ||||||
| 	struct mptcp_sock *msk = mptcp_sk(sk); | 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||||
| 	struct socket *subflow = msk->subflow; | 	struct socket *ssock; | ||||||
|  | 	struct sock *ssk; | ||||||
|  | 	int ret; | ||||||
| 
 | 
 | ||||||
| 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) | 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) | ||||||
| 		return -EOPNOTSUPP; | 		return -EOPNOTSUPP; | ||||||
| 
 | 
 | ||||||
| 	return sock_sendmsg(subflow, msg); | 	lock_sock(sk); | ||||||
|  | 	ssock = __mptcp_tcp_fallback(msk); | ||||||
|  | 	if (ssock) { | ||||||
|  | 		pr_debug("fallback passthrough"); | ||||||
|  | 		ret = sock_sendmsg(ssock, msg); | ||||||
|  | 		release_sock(sk); | ||||||
|  | 		return ret; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ssk = mptcp_subflow_get(msk); | ||||||
|  | 	if (!ssk) { | ||||||
|  | 		release_sock(sk); | ||||||
|  | 		return -ENOTCONN; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ret = sock_sendmsg(ssk->sk_socket, msg); | ||||||
|  | 
 | ||||||
|  | 	release_sock(sk); | ||||||
|  | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | ||||||
| 			 int nonblock, int flags, int *addr_len) | 			 int nonblock, int flags, int *addr_len) | ||||||
| { | { | ||||||
| 	struct mptcp_sock *msk = mptcp_sk(sk); | 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||||
| 	struct socket *subflow = msk->subflow; | 	struct socket *ssock; | ||||||
|  | 	struct sock *ssk; | ||||||
|  | 	int copied = 0; | ||||||
| 
 | 
 | ||||||
| 	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) | 	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) | ||||||
| 		return -EOPNOTSUPP; | 		return -EOPNOTSUPP; | ||||||
| 
 | 
 | ||||||
| 	return sock_recvmsg(subflow, msg, flags); | 	lock_sock(sk); | ||||||
|  | 	ssock = __mptcp_tcp_fallback(msk); | ||||||
|  | 	if (ssock) { | ||||||
|  | 		pr_debug("fallback-read subflow=%p", | ||||||
|  | 			 mptcp_subflow_ctx(ssock->sk)); | ||||||
|  | 		copied = sock_recvmsg(ssock, msg, flags); | ||||||
|  | 		release_sock(sk); | ||||||
|  | 		return copied; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ssk = mptcp_subflow_get(msk); | ||||||
|  | 	if (!ssk) { | ||||||
|  | 		release_sock(sk); | ||||||
|  | 		return -ENOTCONN; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	copied = sock_recvmsg(ssk->sk_socket, msg, flags); | ||||||
|  | 
 | ||||||
|  | 	release_sock(sk); | ||||||
|  | 
 | ||||||
|  | 	return copied; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* subflow sockets can be either outgoing (connect) or incoming
 | ||||||
|  |  * (accept). | ||||||
|  |  * | ||||||
|  |  * Outgoing subflows use in-kernel sockets. | ||||||
|  |  * Incoming subflows do not have their own 'struct socket' allocated, | ||||||
|  |  * so we need to use tcp_close() after detaching them from the mptcp | ||||||
|  |  * parent socket. | ||||||
|  |  */ | ||||||
|  | static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, | ||||||
|  | 			      struct mptcp_subflow_context *subflow, | ||||||
|  | 			      long timeout) | ||||||
|  | { | ||||||
|  | 	struct socket *sock = READ_ONCE(ssk->sk_socket); | ||||||
|  | 
 | ||||||
|  | 	list_del(&subflow->node); | ||||||
|  | 
 | ||||||
|  | 	if (sock && sock != sk->sk_socket) { | ||||||
|  | 		/* outgoing subflow */ | ||||||
|  | 		sock_release(sock); | ||||||
|  | 	} else { | ||||||
|  | 		/* incoming subflow */ | ||||||
|  | 		tcp_close(ssk, timeout); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mptcp_init_sock(struct sock *sk) | static int mptcp_init_sock(struct sock *sk) | ||||||
| { | { | ||||||
|  | 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||||
|  | 
 | ||||||
|  | 	INIT_LIST_HEAD(&msk->conn_list); | ||||||
|  | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mptcp_close(struct sock *sk, long timeout) | static void mptcp_close(struct sock *sk, long timeout) | ||||||
| { | { | ||||||
|  | 	struct mptcp_subflow_context *subflow, *tmp; | ||||||
| 	struct mptcp_sock *msk = mptcp_sk(sk); | 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||||
| 	struct socket *ssock; |  | ||||||
| 
 | 
 | ||||||
| 	inet_sk_state_store(sk, TCP_CLOSE); | 	inet_sk_state_store(sk, TCP_CLOSE); | ||||||
| 
 | 
 | ||||||
| 	ssock = __mptcp_nmpc_socket(msk); | 	lock_sock(sk); | ||||||
| 	if (ssock) { | 
 | ||||||
| 		pr_debug("subflow=%p", mptcp_subflow_ctx(ssock->sk)); | 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { | ||||||
| 		sock_release(ssock); | 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | ||||||
|  | 
 | ||||||
|  | 		__mptcp_close_ssk(sk, ssk, subflow, timeout); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	sock_orphan(sk); | 	release_sock(sk); | ||||||
| 	sock_put(sk); | 	sk_common_release(sk); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len) | static int mptcp_get_port(struct sock *sk, unsigned short snum) | ||||||
| { | { | ||||||
| 	struct mptcp_sock *msk = mptcp_sk(sk); | 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||||
| 	int err; | 	struct socket *ssock; | ||||||
| 
 | 
 | ||||||
| 	saddr->sa_family = AF_INET; | 	ssock = __mptcp_nmpc_socket(msk); | ||||||
|  | 	pr_debug("msk=%p, subflow=%p", msk, ssock); | ||||||
|  | 	if (WARN_ON_ONCE(!ssock)) | ||||||
|  | 		return -EINVAL; | ||||||
| 
 | 
 | ||||||
| 	pr_debug("msk=%p, subflow=%p", msk, | 	return inet_csk_get_port(ssock->sk, snum); | ||||||
| 		 mptcp_subflow_ctx(msk->subflow->sk)); | } | ||||||
| 
 | 
 | ||||||
| 	err = kernel_connect(msk->subflow, saddr, len, 0); | void mptcp_finish_connect(struct sock *ssk) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow; | ||||||
|  | 	struct mptcp_sock *msk; | ||||||
|  | 	struct sock *sk; | ||||||
| 
 | 
 | ||||||
| 	sk->sk_state = TCP_ESTABLISHED; | 	subflow = mptcp_subflow_ctx(ssk); | ||||||
| 
 | 
 | ||||||
| 	return err; | 	if (!subflow->mp_capable) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	sk = subflow->conn; | ||||||
|  | 	msk = mptcp_sk(sk); | ||||||
|  | 
 | ||||||
|  | 	/* the socket is not connected yet, no msk/subflow ops can access/race
 | ||||||
|  | 	 * accessing the field below | ||||||
|  | 	 */ | ||||||
|  | 	WRITE_ONCE(msk->remote_key, subflow->remote_key); | ||||||
|  | 	WRITE_ONCE(msk->local_key, subflow->local_key); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct proto mptcp_prot = { | static struct proto mptcp_prot = { | ||||||
|  | @ -132,13 +252,12 @@ static struct proto mptcp_prot = { | ||||||
| 	.init		= mptcp_init_sock, | 	.init		= mptcp_init_sock, | ||||||
| 	.close		= mptcp_close, | 	.close		= mptcp_close, | ||||||
| 	.accept		= inet_csk_accept, | 	.accept		= inet_csk_accept, | ||||||
| 	.connect	= mptcp_connect, |  | ||||||
| 	.shutdown	= tcp_shutdown, | 	.shutdown	= tcp_shutdown, | ||||||
| 	.sendmsg	= mptcp_sendmsg, | 	.sendmsg	= mptcp_sendmsg, | ||||||
| 	.recvmsg	= mptcp_recvmsg, | 	.recvmsg	= mptcp_recvmsg, | ||||||
| 	.hash		= inet_hash, | 	.hash		= inet_hash, | ||||||
| 	.unhash		= inet_unhash, | 	.unhash		= inet_unhash, | ||||||
| 	.get_port	= inet_csk_get_port, | 	.get_port	= mptcp_get_port, | ||||||
| 	.obj_size	= sizeof(struct mptcp_sock), | 	.obj_size	= sizeof(struct mptcp_sock), | ||||||
| 	.no_autobind	= true, | 	.no_autobind	= true, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -40,19 +40,47 @@ | ||||||
| struct mptcp_sock { | struct mptcp_sock { | ||||||
| 	/* inet_connection_sock must be the first member */ | 	/* inet_connection_sock must be the first member */ | ||||||
| 	struct inet_connection_sock sk; | 	struct inet_connection_sock sk; | ||||||
|  | 	u64		local_key; | ||||||
|  | 	u64		remote_key; | ||||||
|  | 	struct list_head conn_list; | ||||||
| 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */ | 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | #define mptcp_for_each_subflow(__msk, __subflow)			\ | ||||||
|  | 	list_for_each_entry(__subflow, &((__msk)->conn_list), node) | ||||||
|  | 
 | ||||||
| static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) | static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) | ||||||
| { | { | ||||||
| 	return (struct mptcp_sock *)sk; | 	return (struct mptcp_sock *)sk; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct mptcp_subflow_request_sock { | ||||||
|  | 	struct	tcp_request_sock sk; | ||||||
|  | 	u8	mp_capable : 1, | ||||||
|  | 		mp_join : 1, | ||||||
|  | 		backup : 1; | ||||||
|  | 	u64	local_key; | ||||||
|  | 	u64	remote_key; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static inline struct mptcp_subflow_request_sock * | ||||||
|  | mptcp_subflow_rsk(const struct request_sock *rsk) | ||||||
|  | { | ||||||
|  | 	return (struct mptcp_subflow_request_sock *)rsk; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* MPTCP subflow context */ | /* MPTCP subflow context */ | ||||||
| struct mptcp_subflow_context { | struct mptcp_subflow_context { | ||||||
| 	u32	request_mptcp : 1;  /* send MP_CAPABLE */ | 	struct	list_head node;/* conn_list of subflows */ | ||||||
|  | 	u64	local_key; | ||||||
|  | 	u64	remote_key; | ||||||
|  | 	u32	request_mptcp : 1,  /* send MP_CAPABLE */ | ||||||
|  | 		mp_capable : 1,	    /* remote is MPTCP capable */ | ||||||
|  | 		fourth_ack : 1,	    /* send initial DSS */ | ||||||
|  | 		conn_finished : 1; | ||||||
| 	struct	sock *tcp_sock;	    /* tcp sk backpointer */ | 	struct	sock *tcp_sock;	    /* tcp sk backpointer */ | ||||||
| 	struct	sock *conn;	    /* parent mptcp_sock */ | 	struct	sock *conn;	    /* parent mptcp_sock */ | ||||||
|  | 	const	struct inet_connection_sock_af_ops *icsk_af_ops; | ||||||
| 	struct	rcu_head rcu; | 	struct	rcu_head rcu; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -74,4 +102,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) | ||||||
| void mptcp_subflow_init(void); | void mptcp_subflow_init(void); | ||||||
| int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); | int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); | ||||||
| 
 | 
 | ||||||
|  | extern const struct inet_connection_sock_af_ops ipv4_specific; | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | extern const struct inet_connection_sock_af_ops ipv6_specific; | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | void mptcp_get_options(const struct sk_buff *skb, | ||||||
|  | 		       struct tcp_options_received *opt_rx); | ||||||
|  | 
 | ||||||
|  | void mptcp_finish_connect(struct sock *sk); | ||||||
|  | 
 | ||||||
| #endif /* __MPTCP_PROTOCOL_H */ | #endif /* __MPTCP_PROTOCOL_H */ | ||||||
|  |  | ||||||
|  | @ -12,9 +12,188 @@ | ||||||
| #include <net/inet_hashtables.h> | #include <net/inet_hashtables.h> | ||||||
| #include <net/protocol.h> | #include <net/protocol.h> | ||||||
| #include <net/tcp.h> | #include <net/tcp.h> | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | #include <net/ip6_route.h> | ||||||
|  | #endif | ||||||
| #include <net/mptcp.h> | #include <net/mptcp.h> | ||||||
| #include "protocol.h" | #include "protocol.h" | ||||||
| 
 | 
 | ||||||
|  | static void subflow_init_req(struct request_sock *req, | ||||||
|  | 			     const struct sock *sk_listener, | ||||||
|  | 			     struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); | ||||||
|  | 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); | ||||||
|  | 	struct tcp_options_received rx_opt; | ||||||
|  | 
 | ||||||
|  | 	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); | ||||||
|  | 
 | ||||||
|  | 	memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); | ||||||
|  | 	mptcp_get_options(skb, &rx_opt); | ||||||
|  | 
 | ||||||
|  | 	subflow_req->mp_capable = 0; | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_TCP_MD5SIG | ||||||
|  | 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
 | ||||||
|  | 	 * TCP option space. | ||||||
|  | 	 */ | ||||||
|  | 	if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) | ||||||
|  | 		return; | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { | ||||||
|  | 		subflow_req->mp_capable = 1; | ||||||
|  | 		subflow_req->remote_key = rx_opt.mptcp.sndr_key; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void subflow_v4_init_req(struct request_sock *req, | ||||||
|  | 				const struct sock *sk_listener, | ||||||
|  | 				struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	tcp_rsk(req)->is_mptcp = 1; | ||||||
|  | 
 | ||||||
|  | 	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb); | ||||||
|  | 
 | ||||||
|  | 	subflow_init_req(req, sk_listener, skb); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | static void subflow_v6_init_req(struct request_sock *req, | ||||||
|  | 				const struct sock *sk_listener, | ||||||
|  | 				struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	tcp_rsk(req)->is_mptcp = 1; | ||||||
|  | 
 | ||||||
|  | 	tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb); | ||||||
|  | 
 | ||||||
|  | 	subflow_init_req(req, sk_listener, skb); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 
 | ||||||
|  | 	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); | ||||||
|  | 
 | ||||||
|  | 	if (subflow->conn && !subflow->conn_finished) { | ||||||
|  | 		pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), | ||||||
|  | 			 subflow->remote_key); | ||||||
|  | 		mptcp_finish_connect(sk); | ||||||
|  | 		subflow->conn_finished = 1; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct request_sock_ops subflow_request_sock_ops; | ||||||
|  | static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; | ||||||
|  | 
 | ||||||
|  | static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 
 | ||||||
|  | 	pr_debug("subflow=%p", subflow); | ||||||
|  | 
 | ||||||
|  | 	/* Never answer to SYNs sent to broadcast or multicast */ | ||||||
|  | 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | ||||||
|  | 		goto drop; | ||||||
|  | 
 | ||||||
|  | 	return tcp_conn_request(&subflow_request_sock_ops, | ||||||
|  | 				&subflow_request_sock_ipv4_ops, | ||||||
|  | 				sk, skb); | ||||||
|  | drop: | ||||||
|  | 	tcp_listendrop(sk); | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; | ||||||
|  | static struct inet_connection_sock_af_ops subflow_v6_specific; | ||||||
|  | static struct inet_connection_sock_af_ops subflow_v6m_specific; | ||||||
|  | 
 | ||||||
|  | static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 
 | ||||||
|  | 	pr_debug("subflow=%p", subflow); | ||||||
|  | 
 | ||||||
|  | 	if (skb->protocol == htons(ETH_P_IP)) | ||||||
|  | 		return subflow_v4_conn_request(sk, skb); | ||||||
|  | 
 | ||||||
|  | 	if (!ipv6_unicast_destination(skb)) | ||||||
|  | 		goto drop; | ||||||
|  | 
 | ||||||
|  | 	return tcp_conn_request(&subflow_request_sock_ops, | ||||||
|  | 				&subflow_request_sock_ipv6_ops, sk, skb); | ||||||
|  | 
 | ||||||
|  | drop: | ||||||
|  | 	tcp_listendrop(sk); | ||||||
|  | 	return 0; /* don't send reset */ | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static struct sock *subflow_syn_recv_sock(const struct sock *sk, | ||||||
|  | 					  struct sk_buff *skb, | ||||||
|  | 					  struct request_sock *req, | ||||||
|  | 					  struct dst_entry *dst, | ||||||
|  | 					  struct request_sock *req_unhash, | ||||||
|  | 					  bool *own_req) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); | ||||||
|  | 	struct sock *child; | ||||||
|  | 
 | ||||||
|  | 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); | ||||||
|  | 
 | ||||||
|  | 	/* if the sk is MP_CAPABLE, we already received the client key */ | ||||||
|  | 
 | ||||||
|  | 	child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, | ||||||
|  | 						     req_unhash, own_req); | ||||||
|  | 
 | ||||||
|  | 	if (child && *own_req) { | ||||||
|  | 		if (!mptcp_subflow_ctx(child)) { | ||||||
|  | 			pr_debug("Closing child socket"); | ||||||
|  | 			inet_sk_set_state(child, TCP_CLOSE); | ||||||
|  | 			sock_set_flag(child, SOCK_DEAD); | ||||||
|  | 			inet_csk_destroy_sock(child); | ||||||
|  | 			child = NULL; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return child; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct inet_connection_sock_af_ops subflow_specific; | ||||||
|  | 
 | ||||||
|  | static struct inet_connection_sock_af_ops * | ||||||
|  | subflow_default_af_ops(struct sock *sk) | ||||||
|  | { | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | 	if (sk->sk_family == AF_INET6) | ||||||
|  | 		return &subflow_v6_specific; | ||||||
|  | #endif | ||||||
|  | 	return &subflow_specific; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped) | ||||||
|  | { | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | ||||||
|  | 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||||
|  | 	struct inet_connection_sock_af_ops *target; | ||||||
|  | 
 | ||||||
|  | 	target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); | ||||||
|  | 
 | ||||||
|  | 	pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", | ||||||
|  | 	         subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); | ||||||
|  | 
 | ||||||
|  | 	if (likely(icsk->icsk_af_ops == target)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	subflow->icsk_af_ops = icsk->icsk_af_ops; | ||||||
|  | 	icsk->icsk_af_ops = target; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) | int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) | ||||||
| { | { | ||||||
| 	struct mptcp_subflow_context *subflow; | 	struct mptcp_subflow_context *subflow; | ||||||
|  | @ -22,7 +201,8 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) | ||||||
| 	struct socket *sf; | 	struct socket *sf; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
| 	err = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sf); | 	err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, | ||||||
|  | 			       &sf); | ||||||
| 	if (err) | 	if (err) | ||||||
| 		return err; | 		return err; | ||||||
| 
 | 
 | ||||||
|  | @ -60,6 +240,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx); | 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx); | ||||||
|  | 	INIT_LIST_HEAD(&ctx->node); | ||||||
| 
 | 
 | ||||||
| 	pr_debug("subflow=%p", ctx); | 	pr_debug("subflow=%p", ctx); | ||||||
| 
 | 
 | ||||||
|  | @ -70,6 +251,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, | ||||||
| 
 | 
 | ||||||
| static int subflow_ulp_init(struct sock *sk) | static int subflow_ulp_init(struct sock *sk) | ||||||
| { | { | ||||||
|  | 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||||
| 	struct mptcp_subflow_context *ctx; | 	struct mptcp_subflow_context *ctx; | ||||||
| 	struct tcp_sock *tp = tcp_sk(sk); | 	struct tcp_sock *tp = tcp_sk(sk); | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
|  | @ -91,6 +273,8 @@ static int subflow_ulp_init(struct sock *sk) | ||||||
| 	pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); | 	pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); | ||||||
| 
 | 
 | ||||||
| 	tp->is_mptcp = 1; | 	tp->is_mptcp = 1; | ||||||
|  | 	ctx->icsk_af_ops = icsk->icsk_af_ops; | ||||||
|  | 	icsk->icsk_af_ops = subflow_default_af_ops(sk); | ||||||
| out: | out: | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  | @ -105,15 +289,97 @@ static void subflow_ulp_release(struct sock *sk) | ||||||
| 	kfree_rcu(ctx, rcu); | 	kfree_rcu(ctx, rcu); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void subflow_ulp_fallback(struct sock *sk) | ||||||
|  | { | ||||||
|  | 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||||
|  | 
 | ||||||
|  | 	icsk->icsk_ulp_ops = NULL; | ||||||
|  | 	rcu_assign_pointer(icsk->icsk_ulp_data, NULL); | ||||||
|  | 	tcp_sk(sk)->is_mptcp = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void subflow_ulp_clone(const struct request_sock *req, | ||||||
|  | 			      struct sock *newsk, | ||||||
|  | 			      const gfp_t priority) | ||||||
|  | { | ||||||
|  | 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); | ||||||
|  | 	struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); | ||||||
|  | 	struct mptcp_subflow_context *new_ctx; | ||||||
|  | 
 | ||||||
|  | 	if (!subflow_req->mp_capable) { | ||||||
|  | 		subflow_ulp_fallback(newsk); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	new_ctx = subflow_create_ctx(newsk, priority); | ||||||
|  | 	if (new_ctx == NULL) { | ||||||
|  | 		subflow_ulp_fallback(newsk); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	new_ctx->conn_finished = 1; | ||||||
|  | 	new_ctx->icsk_af_ops = old_ctx->icsk_af_ops; | ||||||
|  | 	new_ctx->mp_capable = 1; | ||||||
|  | 	new_ctx->fourth_ack = 1; | ||||||
|  | 	new_ctx->remote_key = subflow_req->remote_key; | ||||||
|  | 	new_ctx->local_key = subflow_req->local_key; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { | static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { | ||||||
| 	.name		= "mptcp", | 	.name		= "mptcp", | ||||||
| 	.owner		= THIS_MODULE, | 	.owner		= THIS_MODULE, | ||||||
| 	.init		= subflow_ulp_init, | 	.init		= subflow_ulp_init, | ||||||
| 	.release	= subflow_ulp_release, | 	.release	= subflow_ulp_release, | ||||||
|  | 	.clone		= subflow_ulp_clone, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static int subflow_ops_init(struct request_sock_ops *subflow_ops) | ||||||
|  | { | ||||||
|  | 	subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); | ||||||
|  | 	subflow_ops->slab_name = "request_sock_subflow"; | ||||||
|  | 
 | ||||||
|  | 	subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, | ||||||
|  | 					      subflow_ops->obj_size, 0, | ||||||
|  | 					      SLAB_ACCOUNT | | ||||||
|  | 					      SLAB_TYPESAFE_BY_RCU, | ||||||
|  | 					      NULL); | ||||||
|  | 	if (!subflow_ops->slab) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void mptcp_subflow_init(void) | void mptcp_subflow_init(void) | ||||||
| { | { | ||||||
|  | 	subflow_request_sock_ops = tcp_request_sock_ops; | ||||||
|  | 	if (subflow_ops_init(&subflow_request_sock_ops) != 0) | ||||||
|  | 		panic("MPTCP: failed to init subflow request sock ops\n"); | ||||||
|  | 
 | ||||||
|  | 	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; | ||||||
|  | 	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req; | ||||||
|  | 
 | ||||||
|  | 	subflow_specific = ipv4_specific; | ||||||
|  | 	subflow_specific.conn_request = subflow_v4_conn_request; | ||||||
|  | 	subflow_specific.syn_recv_sock = subflow_syn_recv_sock; | ||||||
|  | 	subflow_specific.sk_rx_dst_set = subflow_finish_connect; | ||||||
|  | 
 | ||||||
|  | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | ||||||
|  | 	subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; | ||||||
|  | 	subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req; | ||||||
|  | 
 | ||||||
|  | 	subflow_v6_specific = ipv6_specific; | ||||||
|  | 	subflow_v6_specific.conn_request = subflow_v6_conn_request; | ||||||
|  | 	subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; | ||||||
|  | 	subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; | ||||||
|  | 
 | ||||||
|  | 	subflow_v6m_specific = subflow_v6_specific; | ||||||
|  | 	subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; | ||||||
|  | 	subflow_v6m_specific.send_check = ipv4_specific.send_check; | ||||||
|  | 	subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; | ||||||
|  | 	subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; | ||||||
|  | 	subflow_v6m_specific.net_frag_header_len = 0; | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| 	if (tcp_register_ulp(&subflow_ulp_ops) != 0) | 	if (tcp_register_ulp(&subflow_ulp_ops) != 0) | ||||||
| 		panic("MPTCP: failed to register subflows to ULP\n"); | 		panic("MPTCP: failed to register subflows to ULP\n"); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Peter Krystad
						Peter Krystad