forked from mirrors/linux
		
	net: add real socket cookies
A long standing problem in netlink socket dumps is the use of kernel socket addresses as cookies. 1) It is a security concern. 2) Sockets can be reused quite quickly, so there is no guarantee a cookie is used once and identify a flow. 3) request sock, establish sock, and timewait socks for a given flow have different cookies. Part of our effort to bring better TCP statistics requires to switch to a different allocator. In this patch, I chose to use a per network namespace 64bit generator, and to use it only in the case a socket needs to be dumped to netlink. (This might be refined later if needed) Note that I tried to carry cookies from request sock, to establish sock, then timewait sockets. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Eric Salo <salo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									654eff4516
								
							
						
					
					
						commit
						33cf7c90fe
					
				
					 13 changed files with 55 additions and 17 deletions
				
			
		| 
						 | 
					@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 | 
				
			||||||
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 | 
					void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 | 
				
			||||||
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 | 
					void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sock_diag_check_cookie(void *sk, const __u32 *cookie);
 | 
					int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 | 
				
			||||||
void sock_diag_save_cookie(void *sk, __u32 *cookie);
 | 
					void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 | 
					int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 | 
				
			||||||
int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 | 
					int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -77,6 +77,8 @@ struct inet_request_sock {
 | 
				
			||||||
#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
 | 
					#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
 | 
				
			||||||
#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
 | 
					#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
 | 
				
			||||||
#define ir_iif			req.__req_common.skc_bound_dev_if
 | 
					#define ir_iif			req.__req_common.skc_bound_dev_if
 | 
				
			||||||
 | 
					#define ir_cookie		req.__req_common.skc_cookie
 | 
				
			||||||
 | 
					#define ireq_net		req.__req_common.skc_net
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kmemcheck_bitfield_begin(flags);
 | 
						kmemcheck_bitfield_begin(flags);
 | 
				
			||||||
	u16			snd_wscale : 4,
 | 
						u16			snd_wscale : 4,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -122,6 +122,7 @@ struct inet_timewait_sock {
 | 
				
			||||||
#define tw_v6_rcv_saddr    	__tw_common.skc_v6_rcv_saddr
 | 
					#define tw_v6_rcv_saddr    	__tw_common.skc_v6_rcv_saddr
 | 
				
			||||||
#define tw_dport		__tw_common.skc_dport
 | 
					#define tw_dport		__tw_common.skc_dport
 | 
				
			||||||
#define tw_num			__tw_common.skc_num
 | 
					#define tw_num			__tw_common.skc_num
 | 
				
			||||||
 | 
					#define tw_cookie		__tw_common.skc_cookie
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int			tw_timeout;
 | 
						int			tw_timeout;
 | 
				
			||||||
	volatile unsigned char	tw_substate;
 | 
						volatile unsigned char	tw_substate;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -56,6 +56,8 @@ struct net {
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	spinlock_t		rules_mod_lock;
 | 
						spinlock_t		rules_mod_lock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						atomic64_t		cookie_gen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct list_head	list;		/* list of network namespaces */
 | 
						struct list_head	list;		/* list of network namespaces */
 | 
				
			||||||
	struct list_head	cleanup_list;	/* namespaces on death row */
 | 
						struct list_head	cleanup_list;	/* namespaces on death row */
 | 
				
			||||||
	struct list_head	exit_list;	/* Use only net_mutex */
 | 
						struct list_head	exit_list;	/* Use only net_mutex */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -199,6 +199,8 @@ struct sock_common {
 | 
				
			||||||
	struct in6_addr		skc_v6_rcv_saddr;
 | 
						struct in6_addr		skc_v6_rcv_saddr;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						atomic64_t		skc_cookie;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * fields between dontcopy_begin/dontcopy_end
 | 
						 * fields between dontcopy_begin/dontcopy_end
 | 
				
			||||||
	 * are not copied in sock_copy()
 | 
						 * are not copied in sock_copy()
 | 
				
			||||||
| 
						 | 
					@ -329,6 +331,7 @@ struct sock {
 | 
				
			||||||
#define sk_net			__sk_common.skc_net
 | 
					#define sk_net			__sk_common.skc_net
 | 
				
			||||||
#define sk_v6_daddr		__sk_common.skc_v6_daddr
 | 
					#define sk_v6_daddr		__sk_common.skc_v6_daddr
 | 
				
			||||||
#define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
 | 
					#define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
 | 
				
			||||||
 | 
					#define sk_cookie		__sk_common.skc_cookie
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	socket_lock_t		sk_lock;
 | 
						socket_lock_t		sk_lock;
 | 
				
			||||||
	struct sk_buff_head	sk_receive_queue;
 | 
						struct sk_buff_head	sk_receive_queue;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 | 
				
			||||||
		newsk->sk_err	   = 0;
 | 
							newsk->sk_err	   = 0;
 | 
				
			||||||
		newsk->sk_priority = 0;
 | 
							newsk->sk_priority = 0;
 | 
				
			||||||
		newsk->sk_incoming_cpu = raw_smp_processor_id();
 | 
							newsk->sk_incoming_cpu = raw_smp_processor_id();
 | 
				
			||||||
 | 
							atomic64_set(&newsk->sk_cookie, 0);
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Before updating sk_refcnt, we must commit prior changes to memory
 | 
							 * Before updating sk_refcnt, we must commit prior changes to memory
 | 
				
			||||||
		 * (Documentation/RCU/rculist_nulls.txt for details)
 | 
							 * (Documentation/RCU/rculist_nulls.txt for details)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 | 
				
			||||||
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 | 
					static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 | 
				
			||||||
static DEFINE_MUTEX(sock_diag_table_mutex);
 | 
					static DEFINE_MUTEX(sock_diag_table_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sock_diag_check_cookie(void *sk, const __u32 *cookie)
 | 
					static u64 sock_gen_cookie(struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
 | 
						while (1) {
 | 
				
			||||||
	     cookie[1] != INET_DIAG_NOCOOKIE) &&
 | 
							u64 res = atomic64_read(&sk->sk_cookie);
 | 
				
			||||||
	    ((u32)(unsigned long)sk != cookie[0] ||
 | 
					
 | 
				
			||||||
	     (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
 | 
							if (res)
 | 
				
			||||||
 | 
								return res;
 | 
				
			||||||
 | 
							res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
 | 
				
			||||||
 | 
							atomic64_cmpxchg(&sk->sk_cookie, 0, res);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						u64 res;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						res = sock_gen_cookie(sk);
 | 
				
			||||||
 | 
						if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1])
 | 
				
			||||||
		return -ESTALE;
 | 
							return -ESTALE;
 | 
				
			||||||
	else
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
 | 
					EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void sock_diag_save_cookie(void *sk, __u32 *cookie)
 | 
					void sock_diag_save_cookie(struct sock *sk, __u32 *cookie)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	cookie[0] = (u32)(unsigned long)sk;
 | 
						u64 res = sock_gen_cookie(sk);
 | 
				
			||||||
	cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
 | 
					
 | 
				
			||||||
 | 
						cookie[0] = (u32)res;
 | 
				
			||||||
 | 
						cookie[1] = (u32)(res >> 32);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
 | 
					EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	ireq = inet_rsk(req);
 | 
						ireq = inet_rsk(req);
 | 
				
			||||||
	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 | 
						ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 | 
				
			||||||
	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 | 
						ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 | 
				
			||||||
 | 
						ireq->ireq_net = sock_net(sk);
 | 
				
			||||||
 | 
						atomic64_set(&ireq->ir_cookie, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Step 3: Process LISTEN state
 | 
						 * Step 3: Process LISTEN state
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 | 
				
			||||||
		newsk->sk_write_space = sk_stream_write_space;
 | 
							newsk->sk_write_space = sk_stream_write_space;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		newsk->sk_mark = inet_rsk(req)->ir_mark;
 | 
							newsk->sk_mark = inet_rsk(req)->ir_mark;
 | 
				
			||||||
 | 
							atomic64_set(&newsk->sk_cookie,
 | 
				
			||||||
 | 
								     atomic64_read(&inet_rsk(req)->ir_cookie));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		newicsk->icsk_retransmits = 0;
 | 
							newicsk->icsk_retransmits = 0;
 | 
				
			||||||
		newicsk->icsk_backoff	  = 0;
 | 
							newicsk->icsk_backoff	  = 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk,
 | 
				
			||||||
				 user_ns, portid, seq, nlmsg_flags, unlh);
 | 
									 user_ns, portid, seq, nlmsg_flags, unlh);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 | 
					static int inet_twsk_diag_fill(struct sock *sk,
 | 
				
			||||||
			       struct sk_buff *skb,
 | 
								       struct sk_buff *skb,
 | 
				
			||||||
			       const struct inet_diag_req_v2 *req,
 | 
								       const struct inet_diag_req_v2 *req,
 | 
				
			||||||
			       u32 portid, u32 seq, u16 nlmsg_flags,
 | 
								       u32 portid, u32 seq, u16 nlmsg_flags,
 | 
				
			||||||
			       const struct nlmsghdr *unlh)
 | 
								       const struct nlmsghdr *unlh)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct inet_timewait_sock *tw = inet_twsk(sk);
 | 
				
			||||||
	struct inet_diag_msg *r;
 | 
						struct inet_diag_msg *r;
 | 
				
			||||||
	struct nlmsghdr *nlh;
 | 
						struct nlmsghdr *nlh;
 | 
				
			||||||
	s32 tmo;
 | 
						s32 tmo;
 | 
				
			||||||
| 
						 | 
					@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 | 
				
			||||||
	r->idiag_retrans      = 0;
 | 
						r->idiag_retrans      = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r->id.idiag_if	      = tw->tw_bound_dev_if;
 | 
						r->id.idiag_if	      = tw->tw_bound_dev_if;
 | 
				
			||||||
	sock_diag_save_cookie(tw, r->id.idiag_cookie);
 | 
						sock_diag_save_cookie(sk, r->id.idiag_cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r->id.idiag_sport     = tw->tw_sport;
 | 
						r->id.idiag_sport     = tw->tw_sport;
 | 
				
			||||||
	r->id.idiag_dport     = tw->tw_dport;
 | 
						r->id.idiag_dport     = tw->tw_dport;
 | 
				
			||||||
| 
						 | 
					@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
			const struct nlmsghdr *unlh)
 | 
								const struct nlmsghdr *unlh)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (sk->sk_state == TCP_TIME_WAIT)
 | 
						if (sk->sk_state == TCP_TIME_WAIT)
 | 
				
			||||||
		return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
 | 
							return inet_twsk_diag_fill(sk, skb, r, portid, seq,
 | 
				
			||||||
					   nlmsg_flags, unlh);
 | 
										   nlmsg_flags, unlh);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
 | 
						return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
 | 
				
			||||||
| 
						 | 
					@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk,
 | 
				
			||||||
	if (!inet_diag_bc_sk(bc, sk))
 | 
						if (!inet_diag_bc_sk(bc, sk))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return inet_twsk_diag_fill(inet_twsk(sk), skb, r,
 | 
						return inet_twsk_diag_fill(sk, skb, r,
 | 
				
			||||||
				   NETLINK_CB(cb->skb).portid,
 | 
									   NETLINK_CB(cb->skb).portid,
 | 
				
			||||||
				   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 | 
									   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 | 
				
			||||||
	r->idiag_retrans = req->num_retrans;
 | 
						r->idiag_retrans = req->num_retrans;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r->id.idiag_if = sk->sk_bound_dev_if;
 | 
						r->id.idiag_if = sk->sk_bound_dev_if;
 | 
				
			||||||
	sock_diag_save_cookie(req, r->id.idiag_cookie);
 | 
					
 | 
				
			||||||
 | 
						BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
 | 
				
			||||||
 | 
							     offsetof(struct sock, sk_cookie));
 | 
				
			||||||
 | 
						sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tmo = req->expires - jiffies;
 | 
						tmo = req->expires - jiffies;
 | 
				
			||||||
	if (tmo < 0)
 | 
						if (tmo < 0)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 | 
				
			||||||
		tw->tw_ipv6only	    = 0;
 | 
							tw->tw_ipv6only	    = 0;
 | 
				
			||||||
		tw->tw_transparent  = inet->transparent;
 | 
							tw->tw_transparent  = inet->transparent;
 | 
				
			||||||
		tw->tw_prot	    = sk->sk_prot_creator;
 | 
							tw->tw_prot	    = sk->sk_prot_creator;
 | 
				
			||||||
 | 
							atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
 | 
				
			||||||
		twsk_net_set(tw, hold_net(sock_net(sk)));
 | 
							twsk_net_set(tw, hold_net(sock_net(sk)));
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Because we use RCU lookups, we should not set tw_refcnt
 | 
							 * Because we use RCU lookups, we should not set tw_refcnt
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 | 
						req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 | 
				
			||||||
	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 | 
						treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 | 
				
			||||||
	treq->listener		= NULL;
 | 
						treq->listener		= NULL;
 | 
				
			||||||
 | 
						ireq->ireq_net		= sock_net(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* We throwed the options of the initial SYN away, so we hope
 | 
						/* We throwed the options of the initial SYN away, so we hope
 | 
				
			||||||
	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 | 
						 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 | 
						tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 | 
				
			||||||
	tcp_openreq_init(req, &tmp_opt, skb, sk);
 | 
						tcp_openreq_init(req, &tmp_opt, skb, sk);
 | 
				
			||||||
 | 
						inet_rsk(req)->ireq_net = sock_net(sk);
 | 
				
			||||||
 | 
						atomic64_set(&inet_rsk(req)->ir_cookie, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	af_ops->init_req(req, sk, skb);
 | 
						af_ops->init_req(req, sk, skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue