forked from mirrors/linux
		
	syzbot is catching so many bugs triggered by commit 9ee332d99e
("sget(): handle failures of register_shrinker()"). That commit expected
that calling kill_sb() from deactivate_locked_super() without successful
fill_super() is safe, but the reality was different; some callers assign
attributes which are needed for kill_sb() after sget() succeeds.
For example, [1] is a report where sb->s_mode (which seems to be either
FMODE_READ | FMODE_EXCL | FMODE_WRITE or FMODE_READ | FMODE_EXCL) is not
assigned unless sget() succeeds. But it does not worth complicate sget()
so that register_shrinker() failure path can safely call
kill_block_super() via kill_sb(). Making alloc_super() fail if memory
allocation for register_shrinker() failed is much simpler. Let's avoid
calling deactivate_locked_super() from sget_userns() by preallocating
memory for the shrinker and making register_shrinker() in sget_userns()
never fail.
[1] https://syzkaller.appspot.com/bug?id=588996a25a2587be2e3a54e8646728fb9cae44e7
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+5a170e19c963a2e0df79@syzkaller.appspotmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
		
	
			
		
			
				
	
	
		
			83 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef _LINUX_SHRINKER_H
 | 
						|
#define _LINUX_SHRINKER_H
 | 
						|
 | 
						|
/*
 | 
						|
 * This struct is used to pass information from page reclaim to the shrinkers.
 | 
						|
 * We consolidate the values for easier extention later.
 | 
						|
 *
 | 
						|
 * The 'gfpmask' refers to the allocation we are currently trying to
 | 
						|
 * fulfil.
 | 
						|
 */
 | 
						|
struct shrink_control {
 | 
						|
	gfp_t gfp_mask;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * How many objects scan_objects should scan and try to reclaim.
 | 
						|
	 * This is reset before every call, so it is safe for callees
 | 
						|
	 * to modify.
 | 
						|
	 */
 | 
						|
	unsigned long nr_to_scan;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * How many objects did scan_objects process?
 | 
						|
	 * This defaults to nr_to_scan before every call, but the callee
 | 
						|
	 * should track its actual progress.
 | 
						|
	 */
 | 
						|
	unsigned long nr_scanned;
 | 
						|
 | 
						|
	/* current node being shrunk (for NUMA aware shrinkers) */
 | 
						|
	int nid;
 | 
						|
 | 
						|
	/* current memcg being shrunk (for memcg aware shrinkers) */
 | 
						|
	struct mem_cgroup *memcg;
 | 
						|
};
 | 
						|
 | 
						|
#define SHRINK_STOP (~0UL)
 | 
						|
/*
 | 
						|
 * A callback you can register to apply pressure to ageable caches.
 | 
						|
 *
 | 
						|
 * @count_objects should return the number of freeable items in the cache. If
 | 
						|
 * there are no objects to free or the number of freeable items cannot be
 | 
						|
 * determined, it should return 0. No deadlock checks should be done during the
 | 
						|
 * count callback - the shrinker relies on aggregating scan counts that couldn't
 | 
						|
 * be executed due to potential deadlocks to be run at a later call when the
 | 
						|
 * deadlock condition is no longer pending.
 | 
						|
 *
 | 
						|
 * @scan_objects will only be called if @count_objects returned a non-zero
 | 
						|
 * value for the number of freeable objects. The callout should scan the cache
 | 
						|
 * and attempt to free items from the cache. It should then return the number
 | 
						|
 * of objects freed during the scan, or SHRINK_STOP if progress cannot be made
 | 
						|
 * due to potential deadlocks. If SHRINK_STOP is returned, then no further
 | 
						|
 * attempts to call the @scan_objects will be made from the current reclaim
 | 
						|
 * context.
 | 
						|
 *
 | 
						|
 * @flags determine the shrinker abilities, like numa awareness
 | 
						|
 */
 | 
						|
struct shrinker {
 | 
						|
	unsigned long (*count_objects)(struct shrinker *,
 | 
						|
				       struct shrink_control *sc);
 | 
						|
	unsigned long (*scan_objects)(struct shrinker *,
 | 
						|
				      struct shrink_control *sc);
 | 
						|
 | 
						|
	int seeks;	/* seeks to recreate an obj */
 | 
						|
	long batch;	/* reclaim batch size, 0 = default */
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	/* These are for internal use */
 | 
						|
	struct list_head list;
 | 
						|
	/* objs pending delete, per node */
 | 
						|
	atomic_long_t *nr_deferred;
 | 
						|
};
 | 
						|
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
 | 
						|
 | 
						|
/* Flags */
 | 
						|
#define SHRINKER_NUMA_AWARE	(1 << 0)
 | 
						|
#define SHRINKER_MEMCG_AWARE	(1 << 1)
 | 
						|
 | 
						|
extern int prealloc_shrinker(struct shrinker *shrinker);
 | 
						|
extern void register_shrinker_prepared(struct shrinker *shrinker);
 | 
						|
extern int register_shrinker(struct shrinker *shrinker);
 | 
						|
extern void unregister_shrinker(struct shrinker *shrinker);
 | 
						|
extern void free_prealloced_shrinker(struct shrinker *shrinker);
 | 
						|
#endif
 |