mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	I made every global per-network-namespace instead.  But perhaps doing
that to this slab was a step too far.
The kmem_cache_create call in our net init method also seems to be
responsible for this lockdep warning:
[   45.163710] Unable to find swap-space signature
[   45.375718] trinity-c1 (855): attempted to duplicate a private mapping with mremap.  This is not supported.
[   46.055744] futex_wake_op: trinity-c1 tries to shift op by -209; fix this program
[   51.011723]
[   51.013378] ======================================================
[   51.013875] WARNING: possible circular locking dependency detected
[   51.014378] 5.2.0-rc2 #1 Not tainted
[   51.014672] ------------------------------------------------------
[   51.015182] trinity-c2/886 is trying to acquire lock:
[   51.015593] 000000005405f099 (slab_mutex){+.+.}, at: slab_attr_store+0xa2/0x130
[   51.016190]
[   51.016190] but task is already holding lock:
[   51.016652] 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500
[   51.017266]
[   51.017266] which lock already depends on the new lock.
[   51.017266]
[   51.017909]
[   51.017909] the existing dependency chain (in reverse order) is:
[   51.018497]
[   51.018497] -> #1 (kn->count#43){++++}:
[   51.018956]        __lock_acquire+0x7cf/0x1a20
[   51.019317]        lock_acquire+0x17d/0x390
[   51.019658]        __kernfs_remove+0x892/0xae0
[   51.020020]        kernfs_remove_by_name_ns+0x78/0x110
[   51.020435]        sysfs_remove_link+0x55/0xb0
[   51.020832]        sysfs_slab_add+0xc1/0x3e0
[   51.021332]        __kmem_cache_create+0x155/0x200
[   51.021720]        create_cache+0xf5/0x320
[   51.022054]        kmem_cache_create_usercopy+0x179/0x320
[   51.022486]        kmem_cache_create+0x1a/0x30
[   51.022867]        nfsd_reply_cache_init+0x278/0x560
[   51.023266]        nfsd_init_net+0x20f/0x5e0
[   51.023623]        ops_init+0xcb/0x4b0
[   51.023928]        setup_net+0x2fe/0x670
[   51.024315]        copy_net_ns+0x30a/0x3f0
[   51.024653]        create_new_namespaces+0x3c5/0x820
[   51.025257]        unshare_nsproxy_namespaces+0xd1/0x240
[   51.025881]        ksys_unshare+0x506/0x9c0
[   51.026381]        __x64_sys_unshare+0x3a/0x50
[   51.026937]        do_syscall_64+0x110/0x10b0
[   51.027509]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[   51.028175]
[   51.028175] -> #0 (slab_mutex){+.+.}:
[   51.028817]        validate_chain+0x1c51/0x2cc0
[   51.029422]        __lock_acquire+0x7cf/0x1a20
[   51.029947]        lock_acquire+0x17d/0x390
[   51.030438]        __mutex_lock+0x100/0xfa0
[   51.030995]        mutex_lock_nested+0x27/0x30
[   51.031516]        slab_attr_store+0xa2/0x130
[   51.032020]        sysfs_kf_write+0x11d/0x180
[   51.032529]        kernfs_fop_write+0x32a/0x500
[   51.033056]        do_loop_readv_writev+0x21d/0x310
[   51.033627]        do_iter_write+0x2e5/0x380
[   51.034148]        vfs_writev+0x170/0x310
[   51.034616]        do_pwritev+0x13e/0x160
[   51.035100]        __x64_sys_pwritev+0xa3/0x110
[   51.035633]        do_syscall_64+0x110/0x10b0
[   51.036200]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[   51.036924]
[   51.036924] other info that might help us debug this:
[   51.036924]
[   51.037876]  Possible unsafe locking scenario:
[   51.037876]
[   51.038556]        CPU0                    CPU1
[   51.039130]        ----                    ----
[   51.039676]   lock(kn->count#43);
[   51.040084]                                lock(slab_mutex);
[   51.040597]                                lock(kn->count#43);
[   51.041062]   lock(slab_mutex);
[   51.041320]
[   51.041320]  *** DEADLOCK ***
[   51.041320]
[   51.041793] 3 locks held by trinity-c2/886:
[   51.042128]  #0: 000000001f55e152 (sb_writers#5){.+.+}, at: vfs_writev+0x2b9/0x310
[   51.042739]  #1: 00000000c7d6c034 (&of->mutex){+.+.}, at: kernfs_fop_write+0x25b/0x500
[   51.043400]  #2: 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 3ba75830ce "drc containerization"
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
		
	
			
		
			
				
	
	
		
			89 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
/*
 | 
						|
 * Request reply cache. This was heavily inspired by the
 | 
						|
 * implementation in 4.3BSD/4.4BSD.
 | 
						|
 *
 | 
						|
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef NFSCACHE_H
 | 
						|
#define NFSCACHE_H
 | 
						|
 | 
						|
#include <linux/sunrpc/svc.h>
 | 
						|
#include "netns.h"
 | 
						|
 | 
						|
/*
 | 
						|
 * Representation of a reply cache entry.
 | 
						|
 *
 | 
						|
 * Note that we use a sockaddr_in6 to hold the address instead of the more
 | 
						|
 * typical sockaddr_storage. This is for space reasons, since sockaddr_storage
 | 
						|
 * is much larger than a sockaddr_in6.
 | 
						|
 */
 | 
						|
struct svc_cacherep {
 | 
						|
	struct {
 | 
						|
		/* Keep often-read xid, csum in the same cache line: */
 | 
						|
		__be32			k_xid;
 | 
						|
		__wsum			k_csum;
 | 
						|
		u32			k_proc;
 | 
						|
		u32			k_prot;
 | 
						|
		u32			k_vers;
 | 
						|
		unsigned int		k_len;
 | 
						|
		struct sockaddr_in6	k_addr;
 | 
						|
	} c_key;
 | 
						|
 | 
						|
	struct rb_node		c_node;
 | 
						|
	struct list_head	c_lru;
 | 
						|
	unsigned char		c_state,	/* unused, inprog, done */
 | 
						|
				c_type,		/* status, buffer */
 | 
						|
				c_secure : 1;	/* req came from port < 1024 */
 | 
						|
	unsigned long		c_timestamp;
 | 
						|
	union {
 | 
						|
		struct kvec	u_vec;
 | 
						|
		__be32		u_status;
 | 
						|
	}			c_u;
 | 
						|
};
 | 
						|
 | 
						|
#define c_replvec		c_u.u_vec
 | 
						|
#define c_replstat		c_u.u_status
 | 
						|
 | 
						|
/* cache entry states */
 | 
						|
enum {
 | 
						|
	RC_UNUSED,
 | 
						|
	RC_INPROG,
 | 
						|
	RC_DONE
 | 
						|
};
 | 
						|
 | 
						|
/* return values */
 | 
						|
enum {
 | 
						|
	RC_DROPIT,
 | 
						|
	RC_REPLY,
 | 
						|
	RC_DOIT
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * Cache types.
 | 
						|
 * We may want to add more types one day, e.g. for diropres and
 | 
						|
 * attrstat replies. Using cache entries with fixed length instead
 | 
						|
 * of buffer pointers may be more efficient.
 | 
						|
 */
 | 
						|
enum {
 | 
						|
	RC_NOCACHE,
 | 
						|
	RC_REPLSTAT,
 | 
						|
	RC_REPLBUFF,
 | 
						|
};
 | 
						|
 | 
						|
/* Cache entries expire after this time period */
 | 
						|
#define RC_EXPIRE		(120 * HZ)
 | 
						|
 | 
						|
/* Checksum this amount of the request */
 | 
						|
#define RC_CSUMLEN		(256U)
 | 
						|
 | 
						|
int	nfsd_drc_slab_create(void);
 | 
						|
void	nfsd_drc_slab_free(void);
 | 
						|
int	nfsd_reply_cache_init(struct nfsd_net *);
 | 
						|
void	nfsd_reply_cache_shutdown(struct nfsd_net *);
 | 
						|
int	nfsd_cache_lookup(struct svc_rqst *);
 | 
						|
void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
 | 
						|
int	nfsd_reply_cache_stats_open(struct inode *, struct file *);
 | 
						|
 | 
						|
#endif /* NFSCACHE_H */
 |