mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	netlink: Fix wraparounds of sk->sk_rmem_alloc.
Netlink has this pattern in some places if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) atomic_add(skb->truesize, &sk->sk_rmem_alloc); , which has the same problem fixed by commit5a465a0da1("udp: Fix multiple wraparounds of sk->sk_rmem_alloc."). For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition is always false as the two operands are of int. Then, a single socket can eat as many skb as possible until OOM happens, and we can see multiple wraparounds of sk->sk_rmem_alloc. Let's fix it by using atomic_add_return() and comparing the two variables as unsigned int. Before: [root@fedora ~]# ss -f netlink Recv-Q Send-Q Local Address:Port Peer Address:Port -1668710080 0 rtnl:nl_wraparound/293 * After: [root@fedora ~]# ss -f netlink Recv-Q Send-Q Local Address:Port Peer Address:Port 2147483072 0 rtnl:nl_wraparound/290 * ^ `--- INT_MAX - 576 Fixes:1da177e4c3("Linux-2.6.12-rc2") Reported-by: Jason Baron <jbaron@akamai.com> Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/ Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250704054824.1580222-1-kuniyu@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									4e2bba30b1
								
							
						
					
					
						commit
						ae8f160e7e
					
				
					 1 changed files with 53 additions and 36 deletions
				
			
		| 
						 | 
				
			
			@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 | 
			
		|||
	WARN_ON(skb->sk != NULL);
 | 
			
		||||
	skb->sk = sk;
 | 
			
		||||
	skb->destructor = netlink_skb_destructor;
 | 
			
		||||
	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
	sk_mem_charge(sk, skb->truesize);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
 | 
			
		|||
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 | 
			
		||||
		      long *timeo, struct sock *ssk)
 | 
			
		||||
{
 | 
			
		||||
	DECLARE_WAITQUEUE(wait, current);
 | 
			
		||||
	struct netlink_sock *nlk;
 | 
			
		||||
	unsigned int rmem;
 | 
			
		||||
 | 
			
		||||
	nlk = nlk_sk(sk);
 | 
			
		||||
	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
 | 
			
		||||
	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 | 
			
		||||
	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
 | 
			
		||||
		DECLARE_WAITQUEUE(wait, current);
 | 
			
		||||
		if (!*timeo) {
 | 
			
		||||
			if (!ssk || netlink_is_kernel(ssk))
 | 
			
		||||
				netlink_overrun(sk);
 | 
			
		||||
			sock_put(sk);
 | 
			
		||||
			kfree_skb(skb);
 | 
			
		||||
			return -EAGAIN;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		__set_current_state(TASK_INTERRUPTIBLE);
 | 
			
		||||
		add_wait_queue(&nlk->wait, &wait);
 | 
			
		||||
 | 
			
		||||
		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 | 
			
		||||
		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
 | 
			
		||||
		    !sock_flag(sk, SOCK_DEAD))
 | 
			
		||||
			*timeo = schedule_timeout(*timeo);
 | 
			
		||||
 | 
			
		||||
		__set_current_state(TASK_RUNNING);
 | 
			
		||||
		remove_wait_queue(&nlk->wait, &wait);
 | 
			
		||||
		sock_put(sk);
 | 
			
		||||
 | 
			
		||||
		if (signal_pending(current)) {
 | 
			
		||||
			kfree_skb(skb);
 | 
			
		||||
			return sock_intr_errno(*timeo);
 | 
			
		||||
		}
 | 
			
		||||
		return 1;
 | 
			
		||||
	if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
 | 
			
		||||
	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 | 
			
		||||
		netlink_skb_set_owner_r(skb, sk);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
	netlink_skb_set_owner_r(skb, sk);
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
 | 
			
		||||
	if (!*timeo) {
 | 
			
		||||
		if (!ssk || netlink_is_kernel(ssk))
 | 
			
		||||
			netlink_overrun(sk);
 | 
			
		||||
		sock_put(sk);
 | 
			
		||||
		kfree_skb(skb);
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	__set_current_state(TASK_INTERRUPTIBLE);
 | 
			
		||||
	add_wait_queue(&nlk->wait, &wait);
 | 
			
		||||
	rmem = atomic_read(&sk->sk_rmem_alloc);
 | 
			
		||||
 | 
			
		||||
	if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
 | 
			
		||||
	     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
 | 
			
		||||
	    !sock_flag(sk, SOCK_DEAD))
 | 
			
		||||
		*timeo = schedule_timeout(*timeo);
 | 
			
		||||
 | 
			
		||||
	__set_current_state(TASK_RUNNING);
 | 
			
		||||
	remove_wait_queue(&nlk->wait, &wait);
 | 
			
		||||
	sock_put(sk);
 | 
			
		||||
 | 
			
		||||
	if (signal_pending(current)) {
 | 
			
		||||
		kfree_skb(skb);
 | 
			
		||||
		return sock_intr_errno(*timeo);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 | 
			
		||||
| 
						 | 
				
			
			@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 | 
			
		|||
	ret = -ECONNREFUSED;
 | 
			
		||||
	if (nlk->netlink_rcv != NULL) {
 | 
			
		||||
		ret = skb->len;
 | 
			
		||||
		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
		netlink_skb_set_owner_r(skb, sk);
 | 
			
		||||
		NETLINK_CB(skb).sk = ssk;
 | 
			
		||||
		netlink_deliver_tap_kernel(sk, ssk, skb);
 | 
			
		||||
| 
						 | 
				
			
			@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
 | 
			
		|||
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 | 
			
		||||
{
 | 
			
		||||
	struct netlink_sock *nlk = nlk_sk(sk);
 | 
			
		||||
	unsigned int rmem, rcvbuf;
 | 
			
		||||
 | 
			
		||||
	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 | 
			
		||||
	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
 | 
			
		||||
 | 
			
		||||
	if ((rmem != skb->truesize || rmem <= rcvbuf) &&
 | 
			
		||||
	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 | 
			
		||||
		netlink_skb_set_owner_r(skb, sk);
 | 
			
		||||
		__netlink_sendskb(sk, skb);
 | 
			
		||||
		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
 | 
			
		||||
		return rmem > (rcvbuf >> 1);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
	return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2249,6 +2262,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 | 
			
		|||
	struct module *module;
 | 
			
		||||
	int err = -ENOBUFS;
 | 
			
		||||
	int alloc_min_size;
 | 
			
		||||
	unsigned int rmem;
 | 
			
		||||
	int alloc_size;
 | 
			
		||||
 | 
			
		||||
	if (!lock_taken)
 | 
			
		||||
| 
						 | 
				
			
			@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 | 
			
		|||
		goto errout_skb;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 | 
			
		||||
		goto errout_skb;
 | 
			
		||||
 | 
			
		||||
	/* NLMSG_GOODSIZE is small to avoid high order allocations being
 | 
			
		||||
	 * required, but it makes sense to _attempt_ a 32KiB allocation
 | 
			
		||||
	 * to reduce number of system calls on dump operations, if user
 | 
			
		||||
| 
						 | 
				
			
			@ -2283,6 +2294,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 | 
			
		|||
	if (!skb)
 | 
			
		||||
		goto errout_skb;
 | 
			
		||||
 | 
			
		||||
	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
 | 
			
		||||
		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 | 
			
		||||
		goto errout_skb;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Trim skb to allocated size. User is expected to provide buffer as
 | 
			
		||||
	 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
 | 
			
		||||
	 * netlink_recvmsg())). dump will pack as many smaller messages as
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue