forked from mirrors/linux
		
	tcp: Add mark for TIMEWAIT sockets
This version has some suggestions by Eric Dumazet: - Use a local variable for the mark in IPv6 instead of ctl_sk to avoid SMP races. - Use the more elegant "IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark" statement. - Factorize code as sk_fullsock() check is not necessary. Aidan McGurn from Openwave Mobility systems reported the following bug: "Marked routing is broken on customer deployment. Its effects are large increase in Uplink retransmissions caused by the client never receiving the final ACK to their FINACK - this ACK misses the mark and routes out of the incorrect route." Currently marks are added to sk_buffs for replies when the "fwmark_reflect" sysctl is enabled. But not for TW sockets that had sk->sk_mark set via setsockopt(SO_MARK..). Fix this in IPv4/v6 by adding tw->tw_mark for TIME_WAIT sockets. Copy the the original sk->sk_mark in __inet_twsk_hashdance() to the new tw->tw_mark location. Then progate this so that the skb gets sent with the correct mark. Do the same for resets. Give the "fwmark_reflect" sysctl precedence over sk->sk_mark so that netfilter rules are still honored. Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									03bdfc001c
								
							
						
					
					
						commit
						0048369055
					
				
					 5 changed files with 22 additions and 4 deletions
				
			
		| 
						 | 
				
			
			@ -62,6 +62,7 @@ struct inet_timewait_sock {
 | 
			
		|||
#define tw_dr			__tw_common.skc_tw_dr
 | 
			
		||||
 | 
			
		||||
	int			tw_timeout;
 | 
			
		||||
	__u32			tw_mark;
 | 
			
		||||
	volatile unsigned char	tw_substate;
 | 
			
		||||
	unsigned char		tw_rcv_wscale;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1561,7 +1561,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 | 
			
		|||
		oif = skb->skb_iif;
 | 
			
		||||
 | 
			
		||||
	flowi4_init_output(&fl4, oif,
 | 
			
		||||
			   IP4_REPLY_MARK(net, skb->mark),
 | 
			
		||||
			   IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
 | 
			
		||||
			   RT_TOS(arg->tos),
 | 
			
		||||
			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 | 
			
		||||
			   ip_reply_arg_flowi_flags(arg),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -621,6 +621,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 | 
			
		|||
	struct sock *sk1 = NULL;
 | 
			
		||||
#endif
 | 
			
		||||
	struct net *net;
 | 
			
		||||
	struct sock *ctl_sk;
 | 
			
		||||
 | 
			
		||||
	/* Never send a reset in response to a reset. */
 | 
			
		||||
	if (th->rst)
 | 
			
		||||
| 
						 | 
				
			
			@ -723,11 +724,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 | 
			
		|||
	arg.tos = ip_hdr(skb)->tos;
 | 
			
		||||
	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 | 
			
		||||
	ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
 | 
			
		||||
	if (sk)
 | 
			
		||||
		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
 | 
			
		||||
				   inet_twsk(sk)->tw_mark : sk->sk_mark;
 | 
			
		||||
	ip_send_unicast_reply(ctl_sk,
 | 
			
		||||
			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 | 
			
		||||
			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 | 
			
		||||
			      &arg, arg.iov[0].iov_len);
 | 
			
		||||
 | 
			
		||||
	ctl_sk->sk_mark = 0;
 | 
			
		||||
	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 | 
			
		||||
	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
| 
						 | 
				
			
			@ -759,6 +765,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
 | 
			
		|||
	} rep;
 | 
			
		||||
	struct net *net = sock_net(sk);
 | 
			
		||||
	struct ip_reply_arg arg;
 | 
			
		||||
	struct sock *ctl_sk;
 | 
			
		||||
 | 
			
		||||
	memset(&rep.th, 0, sizeof(struct tcphdr));
 | 
			
		||||
	memset(&arg, 0, sizeof(arg));
 | 
			
		||||
| 
						 | 
				
			
			@ -809,11 +816,16 @@ static void tcp_v4_send_ack(const struct sock *sk,
 | 
			
		|||
	arg.tos = tos;
 | 
			
		||||
	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 | 
			
		||||
	ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
 | 
			
		||||
	if (sk)
 | 
			
		||||
		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
 | 
			
		||||
				   inet_twsk(sk)->tw_mark : sk->sk_mark;
 | 
			
		||||
	ip_send_unicast_reply(ctl_sk,
 | 
			
		||||
			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 | 
			
		||||
			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 | 
			
		||||
			      &arg, arg.iov[0].iov_len);
 | 
			
		||||
 | 
			
		||||
	ctl_sk->sk_mark = 0;
 | 
			
		||||
	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -263,6 +263,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 | 
			
		|||
		struct inet_sock *inet = inet_sk(sk);
 | 
			
		||||
 | 
			
		||||
		tw->tw_transparent	= inet->transparent;
 | 
			
		||||
		tw->tw_mark		= sk->sk_mark;
 | 
			
		||||
		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
 | 
			
		||||
		tcptw->tw_rcv_nxt	= tp->rcv_nxt;
 | 
			
		||||
		tcptw->tw_snd_nxt	= tp->snd_nxt;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -803,6 +803,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 | 
			
		|||
	unsigned int tot_len = sizeof(struct tcphdr);
 | 
			
		||||
	struct dst_entry *dst;
 | 
			
		||||
	__be32 *topt;
 | 
			
		||||
	__u32 mark = 0;
 | 
			
		||||
 | 
			
		||||
	if (tsecr)
 | 
			
		||||
		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 | 
			
		||||
| 
						 | 
				
			
			@ -871,7 +872,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 | 
			
		|||
		fl6.flowi6_oif = oif;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 | 
			
		||||
	if (sk)
 | 
			
		||||
		mark = (sk->sk_state == TCP_TIME_WAIT) ?
 | 
			
		||||
			inet_twsk(sk)->tw_mark : sk->sk_mark;
 | 
			
		||||
	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
 | 
			
		||||
	fl6.fl6_dport = t1->dest;
 | 
			
		||||
	fl6.fl6_sport = t1->source;
 | 
			
		||||
	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue