forked from mirrors/linux
		
	mm: multi-gen LRU: per-node lru_gen_folio lists
For each node, memcgs are divided into two generations: the old and the young. For each generation, memcgs are randomly sharded into multiple bins to improve scalability. For each bin, an RCU hlist_nulls is virtually divided into three segments: the head, the tail and the default. An onlining memcg is added to the tail of a random bin in the old generation. The eviction starts at the head of a random bin in the old generation. The per-node memcg generation counter, whose reminder (mod 2) indexes the old generation, is incremented when all its bins become empty. There are four operations: 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its current generation (old or young) and updates its "seg" to "head"; 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its current generation (old or young) and updates its "seg" to "tail"; 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old generation, updates its "gen" to "old" and resets its "seg" to "default"; 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the young generation, updates its "gen" to "young" and resets its "seg" to "default". The events that trigger the above operations are: 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; 2. The first attempt to reclaim an memcg below low, which triggers MEMCG_LRU_TAIL; 3. The first attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_TAIL; 4. The second attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_YOUNG; 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. Note that memcg LRU only applies to global reclaim, and the round-robin incrementing of their max_seq counters ensures the eventual fairness to all eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). Link: https://lkml.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com Signed-off-by: Yu Zhao <yuzhao@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									77d4459a4a
								
							
						
					
					
						commit
						e4dde56cd2
					
				
					 6 changed files with 500 additions and 35 deletions
				
			
		|  | @ -794,6 +794,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) | ||||||
| 	percpu_ref_put(&objcg->refcnt); | 	percpu_ref_put(&objcg->refcnt); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | 	return !memcg || css_tryget(&memcg->css); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void mem_cgroup_put(struct mem_cgroup *memcg) | static inline void mem_cgroup_put(struct mem_cgroup *memcg) | ||||||
| { | { | ||||||
| 	if (memcg) | 	if (memcg) | ||||||
|  | @ -1301,6 +1306,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void mem_cgroup_put(struct mem_cgroup *memcg) | static inline void mem_cgroup_put(struct mem_cgroup *memcg) | ||||||
| { | { | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void) | ||||||
| 	return current->in_lru_fault; | 	return current->in_lru_fault; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_MEMCG | ||||||
|  | static inline int lru_gen_memcg_seg(struct lruvec *lruvec) | ||||||
|  | { | ||||||
|  | 	return READ_ONCE(lruvec->lrugen.seg); | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | static inline int lru_gen_memcg_seg(struct lruvec *lruvec) | ||||||
|  | { | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| static inline int lru_gen_from_seq(unsigned long seq) | static inline int lru_gen_from_seq(unsigned long seq) | ||||||
| { | { | ||||||
| 	return seq % MAX_NR_GENS; | 	return seq % MAX_NR_GENS; | ||||||
|  | @ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void) | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline int lru_gen_memcg_seg(struct lruvec *lruvec) | ||||||
|  | { | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) | static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) | ||||||
| { | { | ||||||
| 	return false; | 	return false; | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| 
 | 
 | ||||||
| #include <linux/spinlock.h> | #include <linux/spinlock.h> | ||||||
| #include <linux/list.h> | #include <linux/list.h> | ||||||
|  | #include <linux/list_nulls.h> | ||||||
| #include <linux/wait.h> | #include <linux/wait.h> | ||||||
| #include <linux/bitops.h> | #include <linux/bitops.h> | ||||||
| #include <linux/cache.h> | #include <linux/cache.h> | ||||||
|  | @ -367,6 +368,15 @@ struct page_vma_mapped_walk; | ||||||
| #define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) | #define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) | ||||||
| #define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) | #define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) | ||||||
| 
 | 
 | ||||||
|  | /* see the comment on MEMCG_NR_GENS */ | ||||||
|  | enum { | ||||||
|  | 	MEMCG_LRU_NOP, | ||||||
|  | 	MEMCG_LRU_HEAD, | ||||||
|  | 	MEMCG_LRU_TAIL, | ||||||
|  | 	MEMCG_LRU_OLD, | ||||||
|  | 	MEMCG_LRU_YOUNG, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_LRU_GEN | #ifdef CONFIG_LRU_GEN | ||||||
| 
 | 
 | ||||||
| enum { | enum { | ||||||
|  | @ -426,6 +436,14 @@ struct lru_gen_folio { | ||||||
| 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; | 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; | ||||||
| 	/* whether the multi-gen LRU is enabled */ | 	/* whether the multi-gen LRU is enabled */ | ||||||
| 	bool enabled; | 	bool enabled; | ||||||
|  | #ifdef CONFIG_MEMCG | ||||||
|  | 	/* the memcg generation this lru_gen_folio belongs to */ | ||||||
|  | 	u8 gen; | ||||||
|  | 	/* the list segment this lru_gen_folio belongs to */ | ||||||
|  | 	u8 seg; | ||||||
|  | 	/* per-node lru_gen_folio list for global reclaim */ | ||||||
|  | 	struct hlist_nulls_node list; | ||||||
|  | #endif | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum { | enum { | ||||||
|  | @ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec); | ||||||
| void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); | void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MEMCG | #ifdef CONFIG_MEMCG | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * For each node, memcgs are divided into two generations: the old and the | ||||||
|  |  * young. For each generation, memcgs are randomly sharded into multiple bins | ||||||
|  |  * to improve scalability. For each bin, the hlist_nulls is virtually divided | ||||||
|  |  * into three segments: the head, the tail and the default. | ||||||
|  |  * | ||||||
|  |  * An onlining memcg is added to the tail of a random bin in the old generation. | ||||||
|  |  * The eviction starts at the head of a random bin in the old generation. The | ||||||
|  |  * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes | ||||||
|  |  * the old generation, is incremented when all its bins become empty. | ||||||
|  |  * | ||||||
|  |  * There are four operations: | ||||||
|  |  * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its | ||||||
|  |  *    current generation (old or young) and updates its "seg" to "head"; | ||||||
|  |  * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its | ||||||
|  |  *    current generation (old or young) and updates its "seg" to "tail"; | ||||||
|  |  * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old | ||||||
|  |  *    generation, updates its "gen" to "old" and resets its "seg" to "default"; | ||||||
|  |  * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the | ||||||
|  |  *    young generation, updates its "gen" to "young" and resets its "seg" to | ||||||
|  |  *    "default". | ||||||
|  |  * | ||||||
|  |  * The events that trigger the above operations are: | ||||||
|  |  * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; | ||||||
|  |  * 2. The first attempt to reclaim an memcg below low, which triggers | ||||||
|  |  *    MEMCG_LRU_TAIL; | ||||||
|  |  * 3. The first attempt to reclaim an memcg below reclaimable size threshold, | ||||||
|  |  *    which triggers MEMCG_LRU_TAIL; | ||||||
|  |  * 4. The second attempt to reclaim an memcg below reclaimable size threshold, | ||||||
|  |  *    which triggers MEMCG_LRU_YOUNG; | ||||||
|  |  * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; | ||||||
|  |  * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; | ||||||
|  |  * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. | ||||||
|  |  * | ||||||
|  |  * Note that memcg LRU only applies to global reclaim, and the round-robin | ||||||
|  |  * incrementing of their max_seq counters ensures the eventual fairness to all | ||||||
|  |  * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). | ||||||
|  |  */ | ||||||
|  | #define MEMCG_NR_GENS	2 | ||||||
|  | #define MEMCG_NR_BINS	8 | ||||||
|  | 
 | ||||||
|  | struct lru_gen_memcg { | ||||||
|  | 	/* the per-node memcg generation counter */ | ||||||
|  | 	unsigned long seq; | ||||||
|  | 	/* each memcg has one lru_gen_folio per node */ | ||||||
|  | 	unsigned long nr_memcgs[MEMCG_NR_GENS]; | ||||||
|  | 	/* per-node lru_gen_folio list for global reclaim */ | ||||||
|  | 	struct hlist_nulls_head	fifo[MEMCG_NR_GENS][MEMCG_NR_BINS]; | ||||||
|  | 	/* protects the above */ | ||||||
|  | 	spinlock_t lock; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | void lru_gen_init_pgdat(struct pglist_data *pgdat); | ||||||
|  | 
 | ||||||
| void lru_gen_init_memcg(struct mem_cgroup *memcg); | void lru_gen_init_memcg(struct mem_cgroup *memcg); | ||||||
| void lru_gen_exit_memcg(struct mem_cgroup *memcg); | void lru_gen_exit_memcg(struct mem_cgroup *memcg); | ||||||
| #endif | void lru_gen_online_memcg(struct mem_cgroup *memcg); | ||||||
|  | void lru_gen_offline_memcg(struct mem_cgroup *memcg); | ||||||
|  | void lru_gen_release_memcg(struct mem_cgroup *memcg); | ||||||
|  | void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); | ||||||
|  | 
 | ||||||
|  | #else /* !CONFIG_MEMCG */ | ||||||
|  | 
 | ||||||
|  | #define MEMCG_NR_GENS	1 | ||||||
|  | 
 | ||||||
|  | struct lru_gen_memcg { | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif /* CONFIG_MEMCG */ | ||||||
| 
 | 
 | ||||||
| #else /* !CONFIG_LRU_GEN */ | #else /* !CONFIG_LRU_GEN */ | ||||||
| 
 | 
 | ||||||
|  | static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline void lru_gen_init_lruvec(struct lruvec *lruvec) | static inline void lru_gen_init_lruvec(struct lruvec *lruvec) | ||||||
| { | { | ||||||
| } | } | ||||||
|  | @ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MEMCG | #ifdef CONFIG_MEMCG | ||||||
|  | 
 | ||||||
| static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) | static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) | ||||||
| { | { | ||||||
| } | } | ||||||
|  | @ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) | ||||||
| static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) | static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) | ||||||
| { | { | ||||||
| } | } | ||||||
| #endif | 
 | ||||||
|  | static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif /* CONFIG_MEMCG */ | ||||||
| 
 | 
 | ||||||
| #endif /* CONFIG_LRU_GEN */ | #endif /* CONFIG_LRU_GEN */ | ||||||
| 
 | 
 | ||||||
|  | @ -1243,6 +1354,8 @@ typedef struct pglist_data { | ||||||
| #ifdef CONFIG_LRU_GEN | #ifdef CONFIG_LRU_GEN | ||||||
| 	/* kswap mm walk data */ | 	/* kswap mm walk data */ | ||||||
| 	struct lru_gen_mm_walk	mm_walk; | 	struct lru_gen_mm_walk	mm_walk; | ||||||
|  | 	/* lru_gen_folio list */ | ||||||
|  | 	struct lru_gen_memcg memcg_lru; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| 	CACHELINE_PADDING(_pad2_); | 	CACHELINE_PADDING(_pad2_); | ||||||
|  |  | ||||||
|  | @ -478,6 +478,16 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) | ||||||
| 	struct mem_cgroup_per_node *mz; | 	struct mem_cgroup_per_node *mz; | ||||||
| 	struct mem_cgroup_tree_per_node *mctz; | 	struct mem_cgroup_tree_per_node *mctz; | ||||||
| 
 | 
 | ||||||
|  | 	if (lru_gen_enabled()) { | ||||||
|  | 		struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; | ||||||
|  | 
 | ||||||
|  | 		/* see the comment on MEMCG_NR_GENS */ | ||||||
|  | 		if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD) | ||||||
|  | 			lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); | ||||||
|  | 
 | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	mctz = soft_limit_tree.rb_tree_per_node[nid]; | 	mctz = soft_limit_tree.rb_tree_per_node[nid]; | ||||||
| 	if (!mctz) | 	if (!mctz) | ||||||
| 		return; | 		return; | ||||||
|  | @ -3530,6 +3540,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, | ||||||
| 	struct mem_cgroup_tree_per_node *mctz; | 	struct mem_cgroup_tree_per_node *mctz; | ||||||
| 	unsigned long excess; | 	unsigned long excess; | ||||||
| 
 | 
 | ||||||
|  | 	if (lru_gen_enabled()) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
| 	if (order > 0) | 	if (order > 0) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
|  | @ -5391,6 +5404,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) | ||||||
| 	if (unlikely(mem_cgroup_is_root(memcg))) | 	if (unlikely(mem_cgroup_is_root(memcg))) | ||||||
| 		queue_delayed_work(system_unbound_wq, &stats_flush_dwork, | 		queue_delayed_work(system_unbound_wq, &stats_flush_dwork, | ||||||
| 				   2UL*HZ); | 				   2UL*HZ); | ||||||
|  | 	lru_gen_online_memcg(memcg); | ||||||
| 	return 0; | 	return 0; | ||||||
| offline_kmem: | offline_kmem: | ||||||
| 	memcg_offline_kmem(memcg); | 	memcg_offline_kmem(memcg); | ||||||
|  | @ -5422,6 +5436,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | ||||||
| 	memcg_offline_kmem(memcg); | 	memcg_offline_kmem(memcg); | ||||||
| 	reparent_shrinker_deferred(memcg); | 	reparent_shrinker_deferred(memcg); | ||||||
| 	wb_memcg_offline(memcg); | 	wb_memcg_offline(memcg); | ||||||
|  | 	lru_gen_offline_memcg(memcg); | ||||||
| 
 | 
 | ||||||
| 	drain_all_stock(memcg); | 	drain_all_stock(memcg); | ||||||
| 
 | 
 | ||||||
|  | @ -5433,6 +5448,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css) | ||||||
| 	struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 	struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||||||
| 
 | 
 | ||||||
| 	invalidate_reclaim_iterators(memcg); | 	invalidate_reclaim_iterators(memcg); | ||||||
|  | 	lru_gen_release_memcg(memcg); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | ||||||
|  |  | ||||||
|  | @ -7941,6 +7941,7 @@ static void __init free_area_init_node(int nid) | ||||||
| 	pgdat_set_deferred_range(pgdat); | 	pgdat_set_deferred_range(pgdat); | ||||||
| 
 | 
 | ||||||
| 	free_area_init_core(pgdat); | 	free_area_init_core(pgdat); | ||||||
|  | 	lru_gen_init_pgdat(pgdat); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __init free_area_init_memoryless_node(int nid) | static void __init free_area_init_memoryless_node(int nid) | ||||||
|  |  | ||||||
							
								
								
									
										374
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										374
									
								
								mm/vmscan.c
									
									
									
									
									
								
							|  | @ -55,6 +55,8 @@ | ||||||
| #include <linux/ctype.h> | #include <linux/ctype.h> | ||||||
| #include <linux/debugfs.h> | #include <linux/debugfs.h> | ||||||
| #include <linux/khugepaged.h> | #include <linux/khugepaged.h> | ||||||
|  | #include <linux/rculist_nulls.h> | ||||||
|  | #include <linux/random.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/tlbflush.h> | #include <asm/tlbflush.h> | ||||||
| #include <asm/div64.h> | #include <asm/div64.h> | ||||||
|  | @ -135,11 +137,6 @@ struct scan_control { | ||||||
| 	/* Always discard instead of demoting to lower tier memory */ | 	/* Always discard instead of demoting to lower tier memory */ | ||||||
| 	unsigned int no_demotion:1; | 	unsigned int no_demotion:1; | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_LRU_GEN |  | ||||||
| 	/* help kswapd make better choices among multiple memcgs */ |  | ||||||
| 	unsigned long last_reclaimed; |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| 	/* Allocation order */ | 	/* Allocation order */ | ||||||
| 	s8 order; | 	s8 order; | ||||||
| 
 | 
 | ||||||
|  | @ -3185,6 +3182,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); | ||||||
| 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\ | 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\ | ||||||
| 			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) | 			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) | ||||||
| 
 | 
 | ||||||
|  | #define get_memcg_gen(seq)	((seq) % MEMCG_NR_GENS) | ||||||
|  | #define get_memcg_bin(bin)	((bin) % MEMCG_NR_BINS) | ||||||
|  | 
 | ||||||
| static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) | static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) | ||||||
| { | { | ||||||
| 	struct pglist_data *pgdat = NODE_DATA(nid); | 	struct pglist_data *pgdat = NODE_DATA(nid); | ||||||
|  | @ -4453,8 +4453,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, | ||||||
| 		if (sc->priority <= DEF_PRIORITY - 2) | 		if (sc->priority <= DEF_PRIORITY - 2) | ||||||
| 			wait_event_killable(lruvec->mm_state.wait, | 			wait_event_killable(lruvec->mm_state.wait, | ||||||
| 					    max_seq < READ_ONCE(lrugen->max_seq)); | 					    max_seq < READ_ONCE(lrugen->max_seq)); | ||||||
| 
 | 		return false; | ||||||
| 		return max_seq < READ_ONCE(lrugen->max_seq); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); | 	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); | ||||||
|  | @ -4527,8 +4526,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
| 
 | 
 | ||||||
| 	VM_WARN_ON_ONCE(!current_is_kswapd()); | 	VM_WARN_ON_ONCE(!current_is_kswapd()); | ||||||
| 
 | 
 | ||||||
| 	sc->last_reclaimed = sc->nr_reclaimed; |  | ||||||
| 
 |  | ||||||
| 	/* check the order to exclude compaction-induced reclaim */ | 	/* check the order to exclude compaction-induced reclaim */ | ||||||
| 	if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) | 	if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) | ||||||
| 		return; | 		return; | ||||||
|  | @ -5117,8 +5114,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, | ||||||
|  * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg |  * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg | ||||||
|  *    reclaim. |  *    reclaim. | ||||||
|  */ |  */ | ||||||
| static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, | static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) | ||||||
| 				    bool can_swap) |  | ||||||
| { | { | ||||||
| 	unsigned long nr_to_scan; | 	unsigned long nr_to_scan; | ||||||
| 	struct mem_cgroup *memcg = lruvec_memcg(lruvec); | 	struct mem_cgroup *memcg = lruvec_memcg(lruvec); | ||||||
|  | @ -5136,10 +5132,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control * | ||||||
| 	if (sc->priority == DEF_PRIORITY) | 	if (sc->priority == DEF_PRIORITY) | ||||||
| 		return nr_to_scan; | 		return nr_to_scan; | ||||||
| 
 | 
 | ||||||
| 	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false); |  | ||||||
| 
 |  | ||||||
| 	/* skip this lruvec as it's low on cold folios */ | 	/* skip this lruvec as it's low on cold folios */ | ||||||
| 	return 0; | 	return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static unsigned long get_nr_to_reclaim(struct scan_control *sc) | static unsigned long get_nr_to_reclaim(struct scan_control *sc) | ||||||
|  | @ -5148,29 +5142,18 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc) | ||||||
| 	if (!global_reclaim(sc)) | 	if (!global_reclaim(sc)) | ||||||
| 		return -1; | 		return -1; | ||||||
| 
 | 
 | ||||||
| 	/* discount the previous progress for kswapd */ |  | ||||||
| 	if (current_is_kswapd()) |  | ||||||
| 		return sc->nr_to_reclaim + sc->last_reclaimed; |  | ||||||
| 
 |  | ||||||
| 	return max(sc->nr_to_reclaim, compact_gap(sc->order)); | 	return max(sc->nr_to_reclaim, compact_gap(sc->order)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | ||||||
| { | { | ||||||
| 	struct blk_plug plug; | 	long nr_to_scan; | ||||||
| 	unsigned long scanned = 0; | 	unsigned long scanned = 0; | ||||||
| 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); | 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); | ||||||
| 
 | 
 | ||||||
| 	lru_add_drain(); |  | ||||||
| 
 |  | ||||||
| 	blk_start_plug(&plug); |  | ||||||
| 
 |  | ||||||
| 	set_mm_walk(lruvec_pgdat(lruvec)); |  | ||||||
| 
 |  | ||||||
| 	while (true) { | 	while (true) { | ||||||
| 		int delta; | 		int delta; | ||||||
| 		int swappiness; | 		int swappiness; | ||||||
| 		unsigned long nr_to_scan; |  | ||||||
| 
 | 
 | ||||||
| 		if (sc->may_swap) | 		if (sc->may_swap) | ||||||
| 			swappiness = get_swappiness(lruvec, sc); | 			swappiness = get_swappiness(lruvec, sc); | ||||||
|  | @ -5180,7 +5163,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc | ||||||
| 			swappiness = 0; | 			swappiness = 0; | ||||||
| 
 | 
 | ||||||
| 		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); | 		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); | ||||||
| 		if (!nr_to_scan) | 		if (nr_to_scan <= 0) | ||||||
| 			break; | 			break; | ||||||
| 
 | 
 | ||||||
| 		delta = evict_folios(lruvec, sc, swappiness); | 		delta = evict_folios(lruvec, sc, swappiness); | ||||||
|  | @ -5197,11 +5180,252 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc | ||||||
| 		cond_resched(); | 		cond_resched(); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/* whether try_to_inc_max_seq() was successful */ | ||||||
|  | 	return nr_to_scan < 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	bool success; | ||||||
|  | 	unsigned long scanned = sc->nr_scanned; | ||||||
|  | 	unsigned long reclaimed = sc->nr_reclaimed; | ||||||
|  | 	int seg = lru_gen_memcg_seg(lruvec); | ||||||
|  | 	struct mem_cgroup *memcg = lruvec_memcg(lruvec); | ||||||
|  | 	struct pglist_data *pgdat = lruvec_pgdat(lruvec); | ||||||
|  | 
 | ||||||
|  | 	/* see the comment on MEMCG_NR_GENS */ | ||||||
|  | 	if (!lruvec_is_sizable(lruvec, sc)) | ||||||
|  | 		return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; | ||||||
|  | 
 | ||||||
|  | 	mem_cgroup_calculate_protection(NULL, memcg); | ||||||
|  | 
 | ||||||
|  | 	if (mem_cgroup_below_min(NULL, memcg)) | ||||||
|  | 		return MEMCG_LRU_YOUNG; | ||||||
|  | 
 | ||||||
|  | 	if (mem_cgroup_below_low(NULL, memcg)) { | ||||||
|  | 		/* see the comment on MEMCG_NR_GENS */ | ||||||
|  | 		if (seg != MEMCG_LRU_TAIL) | ||||||
|  | 			return MEMCG_LRU_TAIL; | ||||||
|  | 
 | ||||||
|  | 		memcg_memory_event(memcg, MEMCG_LOW); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	success = try_to_shrink_lruvec(lruvec, sc); | ||||||
|  | 
 | ||||||
|  | 	shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); | ||||||
|  | 
 | ||||||
|  | 	if (!sc->proactive) | ||||||
|  | 		vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, | ||||||
|  | 			   sc->nr_reclaimed - reclaimed); | ||||||
|  | 
 | ||||||
|  | 	sc->nr_reclaimed += current->reclaim_state->reclaimed_slab; | ||||||
|  | 	current->reclaim_state->reclaimed_slab = 0; | ||||||
|  | 
 | ||||||
|  | 	return success ? MEMCG_LRU_YOUNG : 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_MEMCG | ||||||
|  | 
 | ||||||
|  | static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	int gen; | ||||||
|  | 	int bin; | ||||||
|  | 	int first_bin; | ||||||
|  | 	struct lruvec *lruvec; | ||||||
|  | 	struct lru_gen_folio *lrugen; | ||||||
|  | 	const struct hlist_nulls_node *pos; | ||||||
|  | 	int op = 0; | ||||||
|  | 	struct mem_cgroup *memcg = NULL; | ||||||
|  | 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); | ||||||
|  | 
 | ||||||
|  | 	bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); | ||||||
|  | restart: | ||||||
|  | 	gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 
 | ||||||
|  | 	hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) { | ||||||
|  | 		if (op) | ||||||
|  | 			lru_gen_rotate_memcg(lruvec, op); | ||||||
|  | 
 | ||||||
|  | 		mem_cgroup_put(memcg); | ||||||
|  | 
 | ||||||
|  | 		lruvec = container_of(lrugen, struct lruvec, lrugen); | ||||||
|  | 		memcg = lruvec_memcg(lruvec); | ||||||
|  | 
 | ||||||
|  | 		if (!mem_cgroup_tryget(memcg)) { | ||||||
|  | 			op = 0; | ||||||
|  | 			memcg = NULL; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		rcu_read_unlock(); | ||||||
|  | 
 | ||||||
|  | 		op = shrink_one(lruvec, sc); | ||||||
|  | 
 | ||||||
|  | 		if (sc->nr_reclaimed >= nr_to_reclaim) | ||||||
|  | 			goto success; | ||||||
|  | 
 | ||||||
|  | 		rcu_read_lock(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 
 | ||||||
|  | 	/* restart if raced with lru_gen_rotate_memcg() */ | ||||||
|  | 	if (gen != get_nulls_value(pos)) | ||||||
|  | 		goto restart; | ||||||
|  | 
 | ||||||
|  | 	/* try the rest of the bins of the current generation */ | ||||||
|  | 	bin = get_memcg_bin(bin + 1); | ||||||
|  | 	if (bin != first_bin) | ||||||
|  | 		goto restart; | ||||||
|  | success: | ||||||
|  | 	if (op) | ||||||
|  | 		lru_gen_rotate_memcg(lruvec, op); | ||||||
|  | 
 | ||||||
|  | 	mem_cgroup_put(memcg); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	struct blk_plug plug; | ||||||
|  | 
 | ||||||
|  | 	VM_WARN_ON_ONCE(global_reclaim(sc)); | ||||||
|  | 
 | ||||||
|  | 	lru_add_drain(); | ||||||
|  | 
 | ||||||
|  | 	blk_start_plug(&plug); | ||||||
|  | 
 | ||||||
|  | 	set_mm_walk(lruvec_pgdat(lruvec)); | ||||||
|  | 
 | ||||||
|  | 	if (try_to_shrink_lruvec(lruvec, sc)) | ||||||
|  | 		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); | ||||||
|  | 
 | ||||||
| 	clear_mm_walk(); | 	clear_mm_walk(); | ||||||
| 
 | 
 | ||||||
| 	blk_finish_plug(&plug); | 	blk_finish_plug(&plug); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #else /* !CONFIG_MEMCG */ | ||||||
|  | 
 | ||||||
|  | static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	BUILD_BUG(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	BUILD_BUG(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	int priority; | ||||||
|  | 	unsigned long reclaimable; | ||||||
|  | 	struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat); | ||||||
|  | 
 | ||||||
|  | 	if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) | ||||||
|  | 		return; | ||||||
|  | 	/*
 | ||||||
|  | 	 * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> | ||||||
|  | 	 * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the | ||||||
|  | 	 * estimated reclaimed_to_scanned_ratio = inactive / total. | ||||||
|  | 	 */ | ||||||
|  | 	reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); | ||||||
|  | 	if (get_swappiness(lruvec, sc)) | ||||||
|  | 		reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); | ||||||
|  | 
 | ||||||
|  | 	reclaimable /= MEMCG_NR_GENS; | ||||||
|  | 
 | ||||||
|  | 	/* round down reclaimable and round up sc->nr_to_reclaim */ | ||||||
|  | 	priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); | ||||||
|  | 
 | ||||||
|  | 	sc->priority = clamp(priority, 0, DEF_PRIORITY); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | 	struct blk_plug plug; | ||||||
|  | 	unsigned long reclaimed = sc->nr_reclaimed; | ||||||
|  | 
 | ||||||
|  | 	VM_WARN_ON_ONCE(!global_reclaim(sc)); | ||||||
|  | 
 | ||||||
|  | 	lru_add_drain(); | ||||||
|  | 
 | ||||||
|  | 	blk_start_plug(&plug); | ||||||
|  | 
 | ||||||
|  | 	set_mm_walk(pgdat); | ||||||
|  | 
 | ||||||
|  | 	set_initial_priority(pgdat, sc); | ||||||
|  | 
 | ||||||
|  | 	if (current_is_kswapd()) | ||||||
|  | 		sc->nr_reclaimed = 0; | ||||||
|  | 
 | ||||||
|  | 	if (mem_cgroup_disabled()) | ||||||
|  | 		shrink_one(&pgdat->__lruvec, sc); | ||||||
|  | 	else | ||||||
|  | 		shrink_many(pgdat, sc); | ||||||
|  | 
 | ||||||
|  | 	if (current_is_kswapd()) | ||||||
|  | 		sc->nr_reclaimed += reclaimed; | ||||||
|  | 
 | ||||||
|  | 	clear_mm_walk(); | ||||||
|  | 
 | ||||||
|  | 	blk_finish_plug(&plug); | ||||||
|  | 
 | ||||||
|  | 	/* kswapd should never fail */ | ||||||
|  | 	pgdat->kswapd_failures = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_MEMCG | ||||||
|  | void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) | ||||||
|  | { | ||||||
|  | 	int seg; | ||||||
|  | 	int old, new; | ||||||
|  | 	int bin = get_random_u32_below(MEMCG_NR_BINS); | ||||||
|  | 	struct pglist_data *pgdat = lruvec_pgdat(lruvec); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&pgdat->memcg_lru.lock); | ||||||
|  | 
 | ||||||
|  | 	VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); | ||||||
|  | 
 | ||||||
|  | 	seg = 0; | ||||||
|  | 	new = old = lruvec->lrugen.gen; | ||||||
|  | 
 | ||||||
|  | 	/* see the comment on MEMCG_NR_GENS */ | ||||||
|  | 	if (op == MEMCG_LRU_HEAD) | ||||||
|  | 		seg = MEMCG_LRU_HEAD; | ||||||
|  | 	else if (op == MEMCG_LRU_TAIL) | ||||||
|  | 		seg = MEMCG_LRU_TAIL; | ||||||
|  | 	else if (op == MEMCG_LRU_OLD) | ||||||
|  | 		new = get_memcg_gen(pgdat->memcg_lru.seq); | ||||||
|  | 	else if (op == MEMCG_LRU_YOUNG) | ||||||
|  | 		new = get_memcg_gen(pgdat->memcg_lru.seq + 1); | ||||||
|  | 	else | ||||||
|  | 		VM_WARN_ON_ONCE(true); | ||||||
|  | 
 | ||||||
|  | 	hlist_nulls_del_rcu(&lruvec->lrugen.list); | ||||||
|  | 
 | ||||||
|  | 	if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) | ||||||
|  | 		hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); | ||||||
|  | 	else | ||||||
|  | 		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); | ||||||
|  | 
 | ||||||
|  | 	pgdat->memcg_lru.nr_memcgs[old]--; | ||||||
|  | 	pgdat->memcg_lru.nr_memcgs[new]++; | ||||||
|  | 
 | ||||||
|  | 	lruvec->lrugen.gen = new; | ||||||
|  | 	WRITE_ONCE(lruvec->lrugen.seg, seg); | ||||||
|  | 
 | ||||||
|  | 	if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) | ||||||
|  | 		WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); | ||||||
|  | 
 | ||||||
|  | 	spin_unlock(&pgdat->memcg_lru.lock); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                          state change |  *                          state change | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
|  | @ -5655,11 +5879,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, | ||||||
| 
 | 
 | ||||||
| 	if (!mem_cgroup_disabled()) { | 	if (!mem_cgroup_disabled()) { | ||||||
| 		rcu_read_lock(); | 		rcu_read_lock(); | ||||||
|  | 
 | ||||||
| 		memcg = mem_cgroup_from_id(memcg_id); | 		memcg = mem_cgroup_from_id(memcg_id); | ||||||
| #ifdef CONFIG_MEMCG | 		if (!mem_cgroup_tryget(memcg)) | ||||||
| 		if (memcg && !css_tryget(&memcg->css)) |  | ||||||
| 			memcg = NULL; | 			memcg = NULL; | ||||||
| #endif | 
 | ||||||
| 		rcu_read_unlock(); | 		rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 		if (!memcg) | 		if (!memcg) | ||||||
|  | @ -5807,6 +6031,19 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MEMCG | #ifdef CONFIG_MEMCG | ||||||
|  | 
 | ||||||
|  | void lru_gen_init_pgdat(struct pglist_data *pgdat) | ||||||
|  | { | ||||||
|  | 	int i, j; | ||||||
|  | 
 | ||||||
|  | 	spin_lock_init(&pgdat->memcg_lru.lock); | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < MEMCG_NR_GENS; i++) { | ||||||
|  | 		for (j = 0; j < MEMCG_NR_BINS; j++) | ||||||
|  | 			INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void lru_gen_init_memcg(struct mem_cgroup *memcg) | void lru_gen_init_memcg(struct mem_cgroup *memcg) | ||||||
| { | { | ||||||
| 	INIT_LIST_HEAD(&memcg->mm_list.fifo); | 	INIT_LIST_HEAD(&memcg->mm_list.fifo); | ||||||
|  | @ -5830,7 +6067,69 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| #endif | 
 | ||||||
|  | void lru_gen_online_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | 	int gen; | ||||||
|  | 	int nid; | ||||||
|  | 	int bin = get_random_u32_below(MEMCG_NR_BINS); | ||||||
|  | 
 | ||||||
|  | 	for_each_node(nid) { | ||||||
|  | 		struct pglist_data *pgdat = NODE_DATA(nid); | ||||||
|  | 		struct lruvec *lruvec = get_lruvec(memcg, nid); | ||||||
|  | 
 | ||||||
|  | 		spin_lock(&pgdat->memcg_lru.lock); | ||||||
|  | 
 | ||||||
|  | 		VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list)); | ||||||
|  | 
 | ||||||
|  | 		gen = get_memcg_gen(pgdat->memcg_lru.seq); | ||||||
|  | 
 | ||||||
|  | 		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); | ||||||
|  | 		pgdat->memcg_lru.nr_memcgs[gen]++; | ||||||
|  | 
 | ||||||
|  | 		lruvec->lrugen.gen = gen; | ||||||
|  | 
 | ||||||
|  | 		spin_unlock(&pgdat->memcg_lru.lock); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void lru_gen_offline_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | 	int nid; | ||||||
|  | 
 | ||||||
|  | 	for_each_node(nid) { | ||||||
|  | 		struct lruvec *lruvec = get_lruvec(memcg, nid); | ||||||
|  | 
 | ||||||
|  | 		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void lru_gen_release_memcg(struct mem_cgroup *memcg) | ||||||
|  | { | ||||||
|  | 	int gen; | ||||||
|  | 	int nid; | ||||||
|  | 
 | ||||||
|  | 	for_each_node(nid) { | ||||||
|  | 		struct pglist_data *pgdat = NODE_DATA(nid); | ||||||
|  | 		struct lruvec *lruvec = get_lruvec(memcg, nid); | ||||||
|  | 
 | ||||||
|  | 		spin_lock(&pgdat->memcg_lru.lock); | ||||||
|  | 
 | ||||||
|  | 		VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); | ||||||
|  | 
 | ||||||
|  | 		gen = lruvec->lrugen.gen; | ||||||
|  | 
 | ||||||
|  | 		hlist_nulls_del_rcu(&lruvec->lrugen.list); | ||||||
|  | 		pgdat->memcg_lru.nr_memcgs[gen]--; | ||||||
|  | 
 | ||||||
|  | 		if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) | ||||||
|  | 			WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); | ||||||
|  | 
 | ||||||
|  | 		spin_unlock(&pgdat->memcg_lru.lock); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif /* CONFIG_MEMCG */ | ||||||
| 
 | 
 | ||||||
| static int __init init_lru_gen(void) | static int __init init_lru_gen(void) | ||||||
| { | { | ||||||
|  | @ -5857,6 +6156,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #endif /* CONFIG_LRU_GEN */ | #endif /* CONFIG_LRU_GEN */ | ||||||
| 
 | 
 | ||||||
| static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | ||||||
|  | @ -5870,7 +6173,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | ||||||
| 	bool proportional_reclaim; | 	bool proportional_reclaim; | ||||||
| 	struct blk_plug plug; | 	struct blk_plug plug; | ||||||
| 
 | 
 | ||||||
| 	if (lru_gen_enabled()) { | 	if (lru_gen_enabled() && !global_reclaim(sc)) { | ||||||
| 		lru_gen_shrink_lruvec(lruvec, sc); | 		lru_gen_shrink_lruvec(lruvec, sc); | ||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
|  | @ -6113,6 +6416,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) | ||||||
| 	struct lruvec *target_lruvec; | 	struct lruvec *target_lruvec; | ||||||
| 	bool reclaimable = false; | 	bool reclaimable = false; | ||||||
| 
 | 
 | ||||||
|  | 	if (lru_gen_enabled() && global_reclaim(sc)) { | ||||||
|  | 		lru_gen_shrink_node(pgdat, sc); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); | 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); | ||||||
| 
 | 
 | ||||||
| again: | again: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Yu Zhao
						Yu Zhao