mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: lockless sock_i_ino()
Followup of commitc51da3f7a1("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit25a9c8a443("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit4f9bf2a2f5("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commitefc3dbc374("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes:d2d6422f8b("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									b4ada0618e
								
							
						
					
					
						commit
						5d6b58c932
					
				
					 4 changed files with 14 additions and 28 deletions
				
			
		| 
						 | 
				
			
			@ -285,6 +285,7 @@ struct sk_filter;
 | 
			
		|||
  *	@sk_ack_backlog: current listen backlog
 | 
			
		||||
  *	@sk_max_ack_backlog: listen backlog set in listen()
 | 
			
		||||
  *	@sk_uid: user id of owner
 | 
			
		||||
  *	@sk_ino: inode number (zero if orphaned)
 | 
			
		||||
  *	@sk_prefer_busy_poll: prefer busypolling over softirq processing
 | 
			
		||||
  *	@sk_busy_poll_budget: napi processing budget when busypolling
 | 
			
		||||
  *	@sk_priority: %SO_PRIORITY setting
 | 
			
		||||
| 
						 | 
				
			
			@ -518,6 +519,7 @@ struct sock {
 | 
			
		|||
	u32			sk_ack_backlog;
 | 
			
		||||
	u32			sk_max_ack_backlog;
 | 
			
		||||
	kuid_t			sk_uid;
 | 
			
		||||
	unsigned long		sk_ino;
 | 
			
		||||
	spinlock_t		sk_peer_lock;
 | 
			
		||||
	int			sk_bind_phc;
 | 
			
		||||
	struct pid		*sk_peer_pid;
 | 
			
		||||
| 
						 | 
				
			
			@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk)
 | 
			
		|||
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 | 
			
		||||
{
 | 
			
		||||
	sk->sk_socket = sock;
 | 
			
		||||
	if (sock) {
 | 
			
		||||
		WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
 | 
			
		||||
		WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
 | 
			
		||||
| 
						 | 
				
			
			@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk)
 | 
			
		|||
	sk_set_socket(sk, NULL);
 | 
			
		||||
	sk->sk_wq  = NULL;
 | 
			
		||||
	/* Note: sk_uid is unchanged. */
 | 
			
		||||
	WRITE_ONCE(sk->sk_ino, 0);
 | 
			
		||||
	write_unlock_bh(&sk->sk_callback_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 | 
			
		|||
	rcu_assign_pointer(sk->sk_wq, &parent->wq);
 | 
			
		||||
	parent->sk = sk;
 | 
			
		||||
	sk_set_socket(sk, parent);
 | 
			
		||||
	WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
 | 
			
		||||
	security_sock_graft(sk, parent);
 | 
			
		||||
	write_unlock_bh(&sk->sk_callback_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline unsigned long sock_i_ino(const struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	/* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
 | 
			
		||||
	return READ_ONCE(sk->sk_ino);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline kuid_t sk_uid(const struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	/* Paired with WRITE_ONCE() in sockfs_setattr() */
 | 
			
		||||
	return READ_ONCE(sk->sk_uid);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned long __sock_i_ino(struct sock *sk);
 | 
			
		||||
unsigned long sock_i_ino(struct sock *sk);
 | 
			
		||||
 | 
			
		||||
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
 | 
			
		|||
EXPORT_SYMBOL(sock_pfree);
 | 
			
		||||
#endif /* CONFIG_INET */
 | 
			
		||||
 | 
			
		||||
unsigned long __sock_i_ino(struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long ino;
 | 
			
		||||
 | 
			
		||||
	read_lock(&sk->sk_callback_lock);
 | 
			
		||||
	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
 | 
			
		||||
	read_unlock(&sk->sk_callback_lock);
 | 
			
		||||
	return ino;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(__sock_i_ino);
 | 
			
		||||
 | 
			
		||||
unsigned long sock_i_ino(struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long ino;
 | 
			
		||||
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	ino = __sock_i_ino(sk);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	return ino;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(sock_i_ino);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Allocate a skb from the socket's send buffer.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
 | 
			
		|||
	write_lock_bh(&sk->sk_callback_lock);
 | 
			
		||||
	rcu_assign_pointer(sk->sk_wq, &parent->wq);
 | 
			
		||||
	sk_set_socket(sk, parent);
 | 
			
		||||
	WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
 | 
			
		||||
	write_unlock_bh(&sk->sk_callback_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 | 
			
		|||
				 NETLINK_CB(cb->skb).portid,
 | 
			
		||||
				 cb->nlh->nlmsg_seq,
 | 
			
		||||
				 NLM_F_MULTI,
 | 
			
		||||
				 __sock_i_ino(sk)) < 0) {
 | 
			
		||||
				 sock_i_ino(sk)) < 0) {
 | 
			
		||||
			ret = 1;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue