mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	net, sk_msg: Annotate lockless access to sk_prot on clone
sk_msg and ULP frameworks override protocol callbacks pointer in
sk->sk_prot, while tcp accesses it locklessly when cloning the listening
socket, that is with neither sk_lock nor sk_callback_lock held.
Once we enable use of listening sockets with sockmap (and hence sk_msg),
there will be shared access to sk->sk_prot if socket is getting cloned
while being inserted/deleted to/from the sockmap from another CPU:
Read side:
tcp_v4_rcv
  sk = __inet_lookup_skb(...)
  tcp_check_req(sk)
    inet_csk(sk)->icsk_af_ops->syn_recv_sock
      tcp_v4_syn_recv_sock
        tcp_create_openreq_child
          inet_csk_clone_lock
            sk_clone_lock
              READ_ONCE(sk->sk_prot)
Write side:
sock_map_ops->map_update_elem
  sock_map_update_elem
    sock_map_update_common
      sock_map_link_no_progs
        tcp_bpf_init
          tcp_bpf_update_sk_prot
            sk_psock_update_proto
              WRITE_ONCE(sk->sk_prot, ops)
sock_map_ops->map_delete_elem
  sock_map_delete_elem
    __sock_map_delete
     sock_map_unref
       sk_psock_put
         sk_psock_drop
           sk_psock_restore_proto
             tcp_update_ulp
               WRITE_ONCE(sk->sk_prot, proto)
Mark the shared access with READ_ONCE/WRITE_ONCE annotations.
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com
			
			
This commit is contained in:
		
							parent
							
								
									e42da4c62a
								
							
						
					
					
						commit
						b8e202d1d1
					
				
					 5 changed files with 14 additions and 7 deletions
				
			
		| 
						 | 
				
			
			@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk,
 | 
			
		|||
	psock->saved_write_space = sk->sk_write_space;
 | 
			
		||||
 | 
			
		||||
	psock->sk_proto = sk->sk_prot;
 | 
			
		||||
	sk->sk_prot = ops;
 | 
			
		||||
	/* Pairs with lockless read in sk_clone_lock() */
 | 
			
		||||
	WRITE_ONCE(sk->sk_prot, ops);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void sk_psock_restore_proto(struct sock *sk,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk)
 | 
			
		|||
 */
 | 
			
		||||
static void sock_copy(struct sock *nsk, const struct sock *osk)
 | 
			
		||||
{
 | 
			
		||||
	const struct proto *prot = READ_ONCE(osk->sk_prot);
 | 
			
		||||
#ifdef CONFIG_SECURITY_NETWORK
 | 
			
		||||
	void *sptr = nsk->sk_security;
 | 
			
		||||
#endif
 | 
			
		||||
	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
 | 
			
		||||
 | 
			
		||||
	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
 | 
			
		||||
	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
 | 
			
		||||
	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SECURITY_NETWORK
 | 
			
		||||
	nsk->sk_security = sptr;
 | 
			
		||||
| 
						 | 
				
			
			@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk)
 | 
			
		|||
 */
 | 
			
		||||
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 | 
			
		||||
{
 | 
			
		||||
	struct proto *prot = READ_ONCE(sk->sk_prot);
 | 
			
		||||
	struct sock *newsk;
 | 
			
		||||
	bool is_charged = true;
 | 
			
		||||
 | 
			
		||||
	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
 | 
			
		||||
	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
 | 
			
		||||
	if (newsk != NULL) {
 | 
			
		||||
		struct sk_filter *filter;
 | 
			
		||||
 | 
			
		||||
		sock_copy(newsk, sk);
 | 
			
		||||
 | 
			
		||||
		newsk->sk_prot_creator = sk->sk_prot;
 | 
			
		||||
		newsk->sk_prot_creator = prot;
 | 
			
		||||
 | 
			
		||||
		/* SANITY */
 | 
			
		||||
		if (likely(newsk->sk_net_refcnt))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
 | 
			
		|||
	/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
 | 
			
		||||
	 * or added requiring sk_prot hook updates. We keep original saved
 | 
			
		||||
	 * hooks in this case.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Pairs with lockless read in sk_clone_lock().
 | 
			
		||||
	 */
 | 
			
		||||
	sk->sk_prot = &tcp_bpf_prots[family][config];
 | 
			
		||||
	WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int tcp_bpf_assert_proto_ops(struct proto *ops)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto,
 | 
			
		|||
 | 
			
		||||
	if (!icsk->icsk_ulp_ops) {
 | 
			
		||||
		sk->sk_write_space = write_space;
 | 
			
		||||
		sk->sk_prot = proto;
 | 
			
		||||
		/* Pairs with lockless read in sk_clone_lock() */
 | 
			
		||||
		WRITE_ONCE(sk->sk_prot, proto);
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p,
 | 
			
		|||
		ctx->sk_write_space = write_space;
 | 
			
		||||
		ctx->sk_proto = p;
 | 
			
		||||
	} else {
 | 
			
		||||
		sk->sk_prot = p;
 | 
			
		||||
		/* Pairs with lockless read in sk_clone_lock(). */
 | 
			
		||||
		WRITE_ONCE(sk->sk_prot, p);
 | 
			
		||||
		sk->sk_write_space = write_space;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue