mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Netlink supports iterative dumping of data. It provides the families
the following ops:
 - start - (optional) kicks off the dumping process
 - dump  - actual dump helper, keeps getting called until it returns 0
 - done  - (optional) pairs with .start, can be used for cleanup
The whole process is asynchronous and the repeated calls to .dump
don't actually happen in a tight loop, but rather are triggered
in response to recvmsg() on the socket.
This gives the user full control over the dump, but also means that
the user can close the socket without getting to the end of the dump.
To make sure .start is always paired with .done we check if there
is an ongoing dump before freeing the socket, and if so call .done.
The complication is that sockets can get freed from BH and .done
is allowed to sleep. So we use a workqueue to defer the call, when
needed.
Unfortunately this does not work correctly. What we defer is not
the cleanup but rather releasing a reference on the socket.
We have no guarantee that we own the last reference, if someone
else holds the socket they may release it in BH and we're back
to square one.
The whole dance, however, appears to be unnecessary. Only the user
can interact with dumps, so we can clean up when socket is closed.
And close always happens in process context. Some async code may
still access the socket after close, queue notification skbs to it etc.
but no dumps can start, end or otherwise make progress.
Delete the workqueue and flush the dump state directly from the release
handler. Note that further cleanup is possible in -next, for instance
we now always call .done before releasing the main module reference,
so dump doesn't have to take a reference of its own.
Reported-by: syzkaller <syzkaller@googlegroups.com>
Fixes: ed5d7788a9 ("netlink: Do not schedule work from sk_destruct")
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
		
	
			
		
			
				
	
	
		
			79 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			79 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef _AF_NETLINK_H
 | 
						|
#define _AF_NETLINK_H
 | 
						|
 | 
						|
#include <linux/rhashtable.h>
 | 
						|
#include <linux/atomic.h>
 | 
						|
#include <net/sock.h>
 | 
						|
 | 
						|
/* flags */
 | 
						|
enum {
 | 
						|
	NETLINK_F_KERNEL_SOCKET,
 | 
						|
	NETLINK_F_RECV_PKTINFO,
 | 
						|
	NETLINK_F_BROADCAST_SEND_ERROR,
 | 
						|
	NETLINK_F_RECV_NO_ENOBUFS,
 | 
						|
	NETLINK_F_LISTEN_ALL_NSID,
 | 
						|
	NETLINK_F_CAP_ACK,
 | 
						|
	NETLINK_F_EXT_ACK,
 | 
						|
	NETLINK_F_STRICT_CHK,
 | 
						|
};
 | 
						|
 | 
						|
#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 | 
						|
#define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
 | 
						|
 | 
						|
struct netlink_sock {
 | 
						|
	/* struct sock has to be the first member of netlink_sock */
 | 
						|
	struct sock		sk;
 | 
						|
	unsigned long		flags;
 | 
						|
	u32			portid;
 | 
						|
	u32			dst_portid;
 | 
						|
	u32			dst_group;
 | 
						|
	u32			subscriptions;
 | 
						|
	u32			ngroups;
 | 
						|
	unsigned long		*groups;
 | 
						|
	unsigned long		state;
 | 
						|
	size_t			max_recvmsg_len;
 | 
						|
	wait_queue_head_t	wait;
 | 
						|
	bool			bound;
 | 
						|
	bool			cb_running;
 | 
						|
	int			dump_done_errno;
 | 
						|
	struct netlink_callback	cb;
 | 
						|
	struct mutex		nl_cb_mutex;
 | 
						|
 | 
						|
	void			(*netlink_rcv)(struct sk_buff *skb);
 | 
						|
	int			(*netlink_bind)(struct net *net, int group);
 | 
						|
	void			(*netlink_unbind)(struct net *net, int group);
 | 
						|
	void			(*netlink_release)(struct sock *sk,
 | 
						|
						   unsigned long *groups);
 | 
						|
	struct module		*module;
 | 
						|
 | 
						|
	struct rhash_head	node;
 | 
						|
	struct rcu_head		rcu;
 | 
						|
};
 | 
						|
 | 
						|
static inline struct netlink_sock *nlk_sk(struct sock *sk)
 | 
						|
{
 | 
						|
	return container_of(sk, struct netlink_sock, sk);
 | 
						|
}
 | 
						|
 | 
						|
#define nlk_test_bit(nr, sk) test_bit(NETLINK_F_##nr, &nlk_sk(sk)->flags)
 | 
						|
 | 
						|
struct netlink_table {
 | 
						|
	struct rhashtable	hash;
 | 
						|
	struct hlist_head	mc_list;
 | 
						|
	struct listeners __rcu	*listeners;
 | 
						|
	unsigned int		flags;
 | 
						|
	unsigned int		groups;
 | 
						|
	struct mutex		*cb_mutex;
 | 
						|
	struct module		*module;
 | 
						|
	int			(*bind)(struct net *net, int group);
 | 
						|
	void			(*unbind)(struct net *net, int group);
 | 
						|
	void                    (*release)(struct sock *sk,
 | 
						|
					   unsigned long *groups);
 | 
						|
	int			registered;
 | 
						|
};
 | 
						|
 | 
						|
extern struct netlink_table *nl_table;
 | 
						|
extern rwlock_t nl_table_lock;
 | 
						|
 | 
						|
#endif
 |