forked from mirrors/linux
		
	When something registers and unregisters many shrinkers, such as:
    for x in $(seq 10000); do unshare -Ui true; done
Sometimes the following error is printed to the kernel log:
    debugfs: Directory '...' with parent 'shrinker' already present!
This occurs since commit badc28d492 ("mm: shrinkers: fix deadlock in
shrinker debugfs") / v6.2: Since the call to `debugfs_remove_recursive`
was moved outside the `shrinker_rwsem`/`shrinker_mutex` lock, but the call
to `ida_free` stayed inside, a newly registered shrinker can be
re-assigned that ID and attempt to create the debugfs directory before the
directory from the previous shrinker has been removed.
The locking changes in commit f95bdb700b ("mm: vmscan: make global slab
shrink lockless") made the race condition more likely, though it existed
before then.
Commit badc28d492 ("mm: shrinkers: fix deadlock in shrinker debugfs")
could be reverted since the issue is addressed should no longer occur
since the count and scan operations are lockless since commit 20cd1892fc
("mm: shrinkers: make count and scan in shrinker debugfs lockless"). 
However, since this is a contended lock, prefer instead moving `ida_free`
outside the lock to avoid the race.
Link: https://lkml.kernel.org/r/20230503013232.299211-1-joanbrugueram@gmail.com
Fixes: badc28d492 ("mm: shrinkers: fix deadlock in shrinker debugfs")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			137 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			137 lines
		
	
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef _LINUX_SHRINKER_H
 | 
						|
#define _LINUX_SHRINKER_H
 | 
						|
 | 
						|
#include <linux/atomic.h>
 | 
						|
#include <linux/types.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * This struct is used to pass information from page reclaim to the shrinkers.
 | 
						|
 * We consolidate the values for easier extension later.
 | 
						|
 *
 | 
						|
 * The 'gfpmask' refers to the allocation we are currently trying to
 | 
						|
 * fulfil.
 | 
						|
 */
 | 
						|
struct shrink_control {
 | 
						|
	gfp_t gfp_mask;
 | 
						|
 | 
						|
	/* current node being shrunk (for NUMA aware shrinkers) */
 | 
						|
	int nid;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * How many objects scan_objects should scan and try to reclaim.
 | 
						|
	 * This is reset before every call, so it is safe for callees
 | 
						|
	 * to modify.
 | 
						|
	 */
 | 
						|
	unsigned long nr_to_scan;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * How many objects did scan_objects process?
 | 
						|
	 * This defaults to nr_to_scan before every call, but the callee
 | 
						|
	 * should track its actual progress.
 | 
						|
	 */
 | 
						|
	unsigned long nr_scanned;
 | 
						|
 | 
						|
	/* current memcg being shrunk (for memcg aware shrinkers) */
 | 
						|
	struct mem_cgroup *memcg;
 | 
						|
};
 | 
						|
 | 
						|
#define SHRINK_STOP (~0UL)
 | 
						|
#define SHRINK_EMPTY (~0UL - 1)
 | 
						|
/*
 | 
						|
 * A callback you can register to apply pressure to ageable caches.
 | 
						|
 *
 | 
						|
 * @count_objects should return the number of freeable items in the cache. If
 | 
						|
 * there are no objects to free, it should return SHRINK_EMPTY, while 0 is
 | 
						|
 * returned in cases of the number of freeable items cannot be determined
 | 
						|
 * or shrinker should skip this cache for this time (e.g., their number
 | 
						|
 * is below shrinkable limit). No deadlock checks should be done during the
 | 
						|
 * count callback - the shrinker relies on aggregating scan counts that couldn't
 | 
						|
 * be executed due to potential deadlocks to be run at a later call when the
 | 
						|
 * deadlock condition is no longer pending.
 | 
						|
 *
 | 
						|
 * @scan_objects will only be called if @count_objects returned a non-zero
 | 
						|
 * value for the number of freeable objects. The callout should scan the cache
 | 
						|
 * and attempt to free items from the cache. It should then return the number
 | 
						|
 * of objects freed during the scan, or SHRINK_STOP if progress cannot be made
 | 
						|
 * due to potential deadlocks. If SHRINK_STOP is returned, then no further
 | 
						|
 * attempts to call the @scan_objects will be made from the current reclaim
 | 
						|
 * context.
 | 
						|
 *
 | 
						|
 * @flags determine the shrinker abilities, like numa awareness
 | 
						|
 */
 | 
						|
struct shrinker {
 | 
						|
	unsigned long (*count_objects)(struct shrinker *,
 | 
						|
				       struct shrink_control *sc);
 | 
						|
	unsigned long (*scan_objects)(struct shrinker *,
 | 
						|
				      struct shrink_control *sc);
 | 
						|
 | 
						|
	long batch;	/* reclaim batch size, 0 = default */
 | 
						|
	int seeks;	/* seeks to recreate an obj */
 | 
						|
	unsigned flags;
 | 
						|
 | 
						|
	/* These are for internal use */
 | 
						|
	struct list_head list;
 | 
						|
#ifdef CONFIG_MEMCG
 | 
						|
	/* ID in shrinker_idr */
 | 
						|
	int id;
 | 
						|
#endif
 | 
						|
#ifdef CONFIG_SHRINKER_DEBUG
 | 
						|
	int debugfs_id;
 | 
						|
	const char *name;
 | 
						|
	struct dentry *debugfs_entry;
 | 
						|
#endif
 | 
						|
	/* objs pending delete, per node */
 | 
						|
	atomic_long_t *nr_deferred;
 | 
						|
};
 | 
						|
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
 | 
						|
 | 
						|
/* Flags */
 | 
						|
#define SHRINKER_REGISTERED	(1 << 0)
 | 
						|
#define SHRINKER_NUMA_AWARE	(1 << 1)
 | 
						|
#define SHRINKER_MEMCG_AWARE	(1 << 2)
 | 
						|
/*
 | 
						|
 * It just makes sense when the shrinker is also MEMCG_AWARE for now,
 | 
						|
 * non-MEMCG_AWARE shrinker should not have this flag set.
 | 
						|
 */
 | 
						|
#define SHRINKER_NONSLAB	(1 << 3)
 | 
						|
 | 
						|
extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker,
 | 
						|
					    const char *fmt, ...);
 | 
						|
extern void register_shrinker_prepared(struct shrinker *shrinker);
 | 
						|
extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker,
 | 
						|
					    const char *fmt, ...);
 | 
						|
extern void unregister_shrinker(struct shrinker *shrinker);
 | 
						|
extern void free_prealloced_shrinker(struct shrinker *shrinker);
 | 
						|
extern void synchronize_shrinkers(void);
 | 
						|
 | 
						|
#ifdef CONFIG_SHRINKER_DEBUG
 | 
						|
extern int shrinker_debugfs_add(struct shrinker *shrinker);
 | 
						|
extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
 | 
						|
					      int *debugfs_id);
 | 
						|
extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
 | 
						|
				    int debugfs_id);
 | 
						|
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
 | 
						|
						  const char *fmt, ...);
 | 
						|
#else /* CONFIG_SHRINKER_DEBUG */
 | 
						|
static inline int shrinker_debugfs_add(struct shrinker *shrinker)
 | 
						|
{
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
 | 
						|
						     int *debugfs_id)
 | 
						|
{
 | 
						|
	*debugfs_id = -1;
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
 | 
						|
					   int debugfs_id)
 | 
						|
{
 | 
						|
}
 | 
						|
static inline __printf(2, 3)
 | 
						|
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
 | 
						|
{
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
#endif /* CONFIG_SHRINKER_DEBUG */
 | 
						|
#endif /* _LINUX_SHRINKER_H */
 |