mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: multi-gen LRU: per-node lru_gen_folio lists
For each node, memcgs are divided into two generations: the old and the young. For each generation, memcgs are randomly sharded into multiple bins to improve scalability. For each bin, an RCU hlist_nulls is virtually divided into three segments: the head, the tail and the default. An onlining memcg is added to the tail of a random bin in the old generation. The eviction starts at the head of a random bin in the old generation. The per-node memcg generation counter, whose reminder (mod 2) indexes the old generation, is incremented when all its bins become empty. There are four operations: 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its current generation (old or young) and updates its "seg" to "head"; 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its current generation (old or young) and updates its "seg" to "tail"; 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old generation, updates its "gen" to "old" and resets its "seg" to "default"; 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the young generation, updates its "gen" to "young" and resets its "seg" to "default". The events that trigger the above operations are: 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; 2. The first attempt to reclaim an memcg below low, which triggers MEMCG_LRU_TAIL; 3. The first attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_TAIL; 4. The second attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_YOUNG; 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. Note that memcg LRU only applies to global reclaim, and the round-robin incrementing of their max_seq counters ensures the eventual fairness to all eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). Link: https://lkml.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com Signed-off-by: Yu Zhao <yuzhao@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									77d4459a4a
								
							
						
					
					
						commit
						e4dde56cd2
					
				
					 6 changed files with 500 additions and 35 deletions
				
			
		| 
						 | 
					@ -794,6 +794,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 | 
				
			||||||
	percpu_ref_put(&objcg->refcnt);
 | 
						percpu_ref_put(&objcg->refcnt);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return !memcg || css_tryget(&memcg->css);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 | 
					static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (memcg)
 | 
						if (memcg)
 | 
				
			||||||
| 
						 | 
					@ -1301,6 +1306,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 | 
					static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void)
 | 
				
			||||||
	return current->in_lru_fault;
 | 
						return current->in_lru_fault;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return READ_ONCE(lruvec->lrugen.seg);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int lru_gen_from_seq(unsigned long seq)
 | 
					static inline int lru_gen_from_seq(unsigned long seq)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return seq % MAX_NR_GENS;
 | 
						return seq % MAX_NR_GENS;
 | 
				
			||||||
| 
						 | 
					@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void)
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
 | 
					static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,6 +7,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/spinlock.h>
 | 
					#include <linux/spinlock.h>
 | 
				
			||||||
#include <linux/list.h>
 | 
					#include <linux/list.h>
 | 
				
			||||||
 | 
					#include <linux/list_nulls.h>
 | 
				
			||||||
#include <linux/wait.h>
 | 
					#include <linux/wait.h>
 | 
				
			||||||
#include <linux/bitops.h>
 | 
					#include <linux/bitops.h>
 | 
				
			||||||
#include <linux/cache.h>
 | 
					#include <linux/cache.h>
 | 
				
			||||||
| 
						 | 
					@ -367,6 +368,15 @@ struct page_vma_mapped_walk;
 | 
				
			||||||
#define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
 | 
					#define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
 | 
				
			||||||
#define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
 | 
					#define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* see the comment on MEMCG_NR_GENS */
 | 
				
			||||||
 | 
					enum {
 | 
				
			||||||
 | 
						MEMCG_LRU_NOP,
 | 
				
			||||||
 | 
						MEMCG_LRU_HEAD,
 | 
				
			||||||
 | 
						MEMCG_LRU_TAIL,
 | 
				
			||||||
 | 
						MEMCG_LRU_OLD,
 | 
				
			||||||
 | 
						MEMCG_LRU_YOUNG,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_LRU_GEN
 | 
					#ifdef CONFIG_LRU_GEN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
| 
						 | 
					@ -426,6 +436,14 @@ struct lru_gen_folio {
 | 
				
			||||||
	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
 | 
						atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
 | 
				
			||||||
	/* whether the multi-gen LRU is enabled */
 | 
						/* whether the multi-gen LRU is enabled */
 | 
				
			||||||
	bool enabled;
 | 
						bool enabled;
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
						/* the memcg generation this lru_gen_folio belongs to */
 | 
				
			||||||
 | 
						u8 gen;
 | 
				
			||||||
 | 
						/* the list segment this lru_gen_folio belongs to */
 | 
				
			||||||
 | 
						u8 seg;
 | 
				
			||||||
 | 
						/* per-node lru_gen_folio list for global reclaim */
 | 
				
			||||||
 | 
						struct hlist_nulls_node list;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
| 
						 | 
					@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec);
 | 
				
			||||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
 | 
					void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_MEMCG
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * For each node, memcgs are divided into two generations: the old and the
 | 
				
			||||||
 | 
					 * young. For each generation, memcgs are randomly sharded into multiple bins
 | 
				
			||||||
 | 
					 * to improve scalability. For each bin, the hlist_nulls is virtually divided
 | 
				
			||||||
 | 
					 * into three segments: the head, the tail and the default.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * An onlining memcg is added to the tail of a random bin in the old generation.
 | 
				
			||||||
 | 
					 * The eviction starts at the head of a random bin in the old generation. The
 | 
				
			||||||
 | 
					 * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes
 | 
				
			||||||
 | 
					 * the old generation, is incremented when all its bins become empty.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * There are four operations:
 | 
				
			||||||
 | 
					 * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its
 | 
				
			||||||
 | 
					 *    current generation (old or young) and updates its "seg" to "head";
 | 
				
			||||||
 | 
					 * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its
 | 
				
			||||||
 | 
					 *    current generation (old or young) and updates its "seg" to "tail";
 | 
				
			||||||
 | 
					 * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old
 | 
				
			||||||
 | 
					 *    generation, updates its "gen" to "old" and resets its "seg" to "default";
 | 
				
			||||||
 | 
					 * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the
 | 
				
			||||||
 | 
					 *    young generation, updates its "gen" to "young" and resets its "seg" to
 | 
				
			||||||
 | 
					 *    "default".
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The events that trigger the above operations are:
 | 
				
			||||||
 | 
					 * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
 | 
				
			||||||
 | 
					 * 2. The first attempt to reclaim an memcg below low, which triggers
 | 
				
			||||||
 | 
					 *    MEMCG_LRU_TAIL;
 | 
				
			||||||
 | 
					 * 3. The first attempt to reclaim an memcg below reclaimable size threshold,
 | 
				
			||||||
 | 
					 *    which triggers MEMCG_LRU_TAIL;
 | 
				
			||||||
 | 
					 * 4. The second attempt to reclaim an memcg below reclaimable size threshold,
 | 
				
			||||||
 | 
					 *    which triggers MEMCG_LRU_YOUNG;
 | 
				
			||||||
 | 
					 * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG;
 | 
				
			||||||
 | 
					 * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG;
 | 
				
			||||||
 | 
					 * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note that memcg LRU only applies to global reclaim, and the round-robin
 | 
				
			||||||
 | 
					 * incrementing of their max_seq counters ensures the eventual fairness to all
 | 
				
			||||||
 | 
					 * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define MEMCG_NR_GENS	2
 | 
				
			||||||
 | 
					#define MEMCG_NR_BINS	8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct lru_gen_memcg {
 | 
				
			||||||
 | 
						/* the per-node memcg generation counter */
 | 
				
			||||||
 | 
						unsigned long seq;
 | 
				
			||||||
 | 
						/* each memcg has one lru_gen_folio per node */
 | 
				
			||||||
 | 
						unsigned long nr_memcgs[MEMCG_NR_GENS];
 | 
				
			||||||
 | 
						/* per-node lru_gen_folio list for global reclaim */
 | 
				
			||||||
 | 
						struct hlist_nulls_head	fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
 | 
				
			||||||
 | 
						/* protects the above */
 | 
				
			||||||
 | 
						spinlock_t lock;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void lru_gen_init_pgdat(struct pglist_data *pgdat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void lru_gen_init_memcg(struct mem_cgroup *memcg);
 | 
					void lru_gen_init_memcg(struct mem_cgroup *memcg);
 | 
				
			||||||
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
 | 
					void lru_gen_exit_memcg(struct mem_cgroup *memcg);
 | 
				
			||||||
#endif
 | 
					void lru_gen_online_memcg(struct mem_cgroup *memcg);
 | 
				
			||||||
 | 
					void lru_gen_offline_memcg(struct mem_cgroup *memcg);
 | 
				
			||||||
 | 
					void lru_gen_release_memcg(struct mem_cgroup *memcg);
 | 
				
			||||||
 | 
					void lru_gen_rotate_memcg(struct lruvec *lruvec, int op);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else /* !CONFIG_MEMCG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define MEMCG_NR_GENS	1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct lru_gen_memcg {
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif /* CONFIG_MEMCG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else /* !CONFIG_LRU_GEN */
 | 
					#else /* !CONFIG_LRU_GEN */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
 | 
					static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_MEMCG
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
					static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
 | 
					static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_online_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif /* CONFIG_MEMCG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* CONFIG_LRU_GEN */
 | 
					#endif /* CONFIG_LRU_GEN */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1243,6 +1354,8 @@ typedef struct pglist_data {
 | 
				
			||||||
#ifdef CONFIG_LRU_GEN
 | 
					#ifdef CONFIG_LRU_GEN
 | 
				
			||||||
	/* kswap mm walk data */
 | 
						/* kswap mm walk data */
 | 
				
			||||||
	struct lru_gen_mm_walk	mm_walk;
 | 
						struct lru_gen_mm_walk	mm_walk;
 | 
				
			||||||
 | 
						/* lru_gen_folio list */
 | 
				
			||||||
 | 
						struct lru_gen_memcg memcg_lru;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	CACHELINE_PADDING(_pad2_);
 | 
						CACHELINE_PADDING(_pad2_);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -478,6 +478,16 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
 | 
				
			||||||
	struct mem_cgroup_per_node *mz;
 | 
						struct mem_cgroup_per_node *mz;
 | 
				
			||||||
	struct mem_cgroup_tree_per_node *mctz;
 | 
						struct mem_cgroup_tree_per_node *mctz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (lru_gen_enabled()) {
 | 
				
			||||||
 | 
							struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* see the comment on MEMCG_NR_GENS */
 | 
				
			||||||
 | 
							if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
 | 
				
			||||||
 | 
								lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mctz = soft_limit_tree.rb_tree_per_node[nid];
 | 
						mctz = soft_limit_tree.rb_tree_per_node[nid];
 | 
				
			||||||
	if (!mctz)
 | 
						if (!mctz)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -3530,6 +3540,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 | 
				
			||||||
	struct mem_cgroup_tree_per_node *mctz;
 | 
						struct mem_cgroup_tree_per_node *mctz;
 | 
				
			||||||
	unsigned long excess;
 | 
						unsigned long excess;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (lru_gen_enabled())
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (order > 0)
 | 
						if (order > 0)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5391,6 +5404,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 | 
				
			||||||
	if (unlikely(mem_cgroup_is_root(memcg)))
 | 
						if (unlikely(mem_cgroup_is_root(memcg)))
 | 
				
			||||||
		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
 | 
							queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
 | 
				
			||||||
				   2UL*HZ);
 | 
									   2UL*HZ);
 | 
				
			||||||
 | 
						lru_gen_online_memcg(memcg);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
offline_kmem:
 | 
					offline_kmem:
 | 
				
			||||||
	memcg_offline_kmem(memcg);
 | 
						memcg_offline_kmem(memcg);
 | 
				
			||||||
| 
						 | 
					@ -5422,6 +5436,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 | 
				
			||||||
	memcg_offline_kmem(memcg);
 | 
						memcg_offline_kmem(memcg);
 | 
				
			||||||
	reparent_shrinker_deferred(memcg);
 | 
						reparent_shrinker_deferred(memcg);
 | 
				
			||||||
	wb_memcg_offline(memcg);
 | 
						wb_memcg_offline(memcg);
 | 
				
			||||||
 | 
						lru_gen_offline_memcg(memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	drain_all_stock(memcg);
 | 
						drain_all_stock(memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5433,6 +5448,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
 | 
				
			||||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
						struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	invalidate_reclaim_iterators(memcg);
 | 
						invalidate_reclaim_iterators(memcg);
 | 
				
			||||||
 | 
						lru_gen_release_memcg(memcg);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 | 
					static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7941,6 +7941,7 @@ static void __init free_area_init_node(int nid)
 | 
				
			||||||
	pgdat_set_deferred_range(pgdat);
 | 
						pgdat_set_deferred_range(pgdat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	free_area_init_core(pgdat);
 | 
						free_area_init_core(pgdat);
 | 
				
			||||||
 | 
						lru_gen_init_pgdat(pgdat);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init free_area_init_memoryless_node(int nid)
 | 
					static void __init free_area_init_memoryless_node(int nid)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										374
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										374
									
								
								mm/vmscan.c
									
									
									
									
									
								
							| 
						 | 
					@ -55,6 +55,8 @@
 | 
				
			||||||
#include <linux/ctype.h>
 | 
					#include <linux/ctype.h>
 | 
				
			||||||
#include <linux/debugfs.h>
 | 
					#include <linux/debugfs.h>
 | 
				
			||||||
#include <linux/khugepaged.h>
 | 
					#include <linux/khugepaged.h>
 | 
				
			||||||
 | 
					#include <linux/rculist_nulls.h>
 | 
				
			||||||
 | 
					#include <linux/random.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/tlbflush.h>
 | 
					#include <asm/tlbflush.h>
 | 
				
			||||||
#include <asm/div64.h>
 | 
					#include <asm/div64.h>
 | 
				
			||||||
| 
						 | 
					@ -135,11 +137,6 @@ struct scan_control {
 | 
				
			||||||
	/* Always discard instead of demoting to lower tier memory */
 | 
						/* Always discard instead of demoting to lower tier memory */
 | 
				
			||||||
	unsigned int no_demotion:1;
 | 
						unsigned int no_demotion:1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_LRU_GEN
 | 
					 | 
				
			||||||
	/* help kswapd make better choices among multiple memcgs */
 | 
					 | 
				
			||||||
	unsigned long last_reclaimed;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Allocation order */
 | 
						/* Allocation order */
 | 
				
			||||||
	s8 order;
 | 
						s8 order;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3185,6 +3182,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
 | 
				
			||||||
		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
 | 
							for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
 | 
				
			||||||
			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
 | 
								for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define get_memcg_gen(seq)	((seq) % MEMCG_NR_GENS)
 | 
				
			||||||
 | 
					#define get_memcg_bin(bin)	((bin) % MEMCG_NR_BINS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
 | 
					static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct pglist_data *pgdat = NODE_DATA(nid);
 | 
						struct pglist_data *pgdat = NODE_DATA(nid);
 | 
				
			||||||
| 
						 | 
					@ -4453,8 +4453,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
		if (sc->priority <= DEF_PRIORITY - 2)
 | 
							if (sc->priority <= DEF_PRIORITY - 2)
 | 
				
			||||||
			wait_event_killable(lruvec->mm_state.wait,
 | 
								wait_event_killable(lruvec->mm_state.wait,
 | 
				
			||||||
					    max_seq < READ_ONCE(lrugen->max_seq));
 | 
										    max_seq < READ_ONCE(lrugen->max_seq));
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
		return max_seq < READ_ONCE(lrugen->max_seq);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
 | 
						VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
 | 
				
			||||||
| 
						 | 
					@ -4527,8 +4526,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_WARN_ON_ONCE(!current_is_kswapd());
 | 
						VM_WARN_ON_ONCE(!current_is_kswapd());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sc->last_reclaimed = sc->nr_reclaimed;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* check the order to exclude compaction-induced reclaim */
 | 
						/* check the order to exclude compaction-induced reclaim */
 | 
				
			||||||
	if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
 | 
						if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -5117,8 +5114,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
 * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
 | 
					 * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
 | 
				
			||||||
 *    reclaim.
 | 
					 *    reclaim.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
 | 
					static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
 | 
				
			||||||
				    bool can_swap)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long nr_to_scan;
 | 
						unsigned long nr_to_scan;
 | 
				
			||||||
	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
						struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
				
			||||||
| 
						 | 
					@ -5136,10 +5132,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
 | 
				
			||||||
	if (sc->priority == DEF_PRIORITY)
 | 
						if (sc->priority == DEF_PRIORITY)
 | 
				
			||||||
		return nr_to_scan;
 | 
							return nr_to_scan;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* skip this lruvec as it's low on cold folios */
 | 
						/* skip this lruvec as it's low on cold folios */
 | 
				
			||||||
	return 0;
 | 
						return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
 | 
					static unsigned long get_nr_to_reclaim(struct scan_control *sc)
 | 
				
			||||||
| 
						 | 
					@ -5148,29 +5142,18 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
 | 
				
			||||||
	if (!global_reclaim(sc))
 | 
						if (!global_reclaim(sc))
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* discount the previous progress for kswapd */
 | 
					 | 
				
			||||||
	if (current_is_kswapd())
 | 
					 | 
				
			||||||
		return sc->nr_to_reclaim + sc->last_reclaimed;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return max(sc->nr_to_reclaim, compact_gap(sc->order));
 | 
						return max(sc->nr_to_reclaim, compact_gap(sc->order));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
					static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct blk_plug plug;
 | 
						long nr_to_scan;
 | 
				
			||||||
	unsigned long scanned = 0;
 | 
						unsigned long scanned = 0;
 | 
				
			||||||
	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
 | 
						unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lru_add_drain();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	blk_start_plug(&plug);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	set_mm_walk(lruvec_pgdat(lruvec));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	while (true) {
 | 
						while (true) {
 | 
				
			||||||
		int delta;
 | 
							int delta;
 | 
				
			||||||
		int swappiness;
 | 
							int swappiness;
 | 
				
			||||||
		unsigned long nr_to_scan;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (sc->may_swap)
 | 
							if (sc->may_swap)
 | 
				
			||||||
			swappiness = get_swappiness(lruvec, sc);
 | 
								swappiness = get_swappiness(lruvec, sc);
 | 
				
			||||||
| 
						 | 
					@ -5180,7 +5163,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
 | 
				
			||||||
			swappiness = 0;
 | 
								swappiness = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
 | 
							nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
 | 
				
			||||||
		if (!nr_to_scan)
 | 
							if (nr_to_scan <= 0)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		delta = evict_folios(lruvec, sc, swappiness);
 | 
							delta = evict_folios(lruvec, sc, swappiness);
 | 
				
			||||||
| 
						 | 
					@ -5197,11 +5180,252 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* whether try_to_inc_max_seq() was successful */
 | 
				
			||||||
 | 
						return nr_to_scan < 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool success;
 | 
				
			||||||
 | 
						unsigned long scanned = sc->nr_scanned;
 | 
				
			||||||
 | 
						unsigned long reclaimed = sc->nr_reclaimed;
 | 
				
			||||||
 | 
						int seg = lru_gen_memcg_seg(lruvec);
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
				
			||||||
 | 
						struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* see the comment on MEMCG_NR_GENS */
 | 
				
			||||||
 | 
						if (!lruvec_is_sizable(lruvec, sc))
 | 
				
			||||||
 | 
							return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mem_cgroup_calculate_protection(NULL, memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (mem_cgroup_below_min(NULL, memcg))
 | 
				
			||||||
 | 
							return MEMCG_LRU_YOUNG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (mem_cgroup_below_low(NULL, memcg)) {
 | 
				
			||||||
 | 
							/* see the comment on MEMCG_NR_GENS */
 | 
				
			||||||
 | 
							if (seg != MEMCG_LRU_TAIL)
 | 
				
			||||||
 | 
								return MEMCG_LRU_TAIL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							memcg_memory_event(memcg, MEMCG_LOW);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						success = try_to_shrink_lruvec(lruvec, sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!sc->proactive)
 | 
				
			||||||
 | 
							vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
 | 
				
			||||||
 | 
								   sc->nr_reclaimed - reclaimed);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sc->nr_reclaimed += current->reclaim_state->reclaimed_slab;
 | 
				
			||||||
 | 
						current->reclaim_state->reclaimed_slab = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return success ? MEMCG_LRU_YOUNG : 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int gen;
 | 
				
			||||||
 | 
						int bin;
 | 
				
			||||||
 | 
						int first_bin;
 | 
				
			||||||
 | 
						struct lruvec *lruvec;
 | 
				
			||||||
 | 
						struct lru_gen_folio *lrugen;
 | 
				
			||||||
 | 
						const struct hlist_nulls_node *pos;
 | 
				
			||||||
 | 
						int op = 0;
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg = NULL;
 | 
				
			||||||
 | 
						unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
 | 
				
			||||||
 | 
					restart:
 | 
				
			||||||
 | 
						gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rcu_read_lock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
 | 
				
			||||||
 | 
							if (op)
 | 
				
			||||||
 | 
								lru_gen_rotate_memcg(lruvec, op);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							mem_cgroup_put(memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							lruvec = container_of(lrugen, struct lruvec, lrugen);
 | 
				
			||||||
 | 
							memcg = lruvec_memcg(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!mem_cgroup_tryget(memcg)) {
 | 
				
			||||||
 | 
								op = 0;
 | 
				
			||||||
 | 
								memcg = NULL;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							op = shrink_one(lruvec, sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (sc->nr_reclaimed >= nr_to_reclaim)
 | 
				
			||||||
 | 
								goto success;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* restart if raced with lru_gen_rotate_memcg() */
 | 
				
			||||||
 | 
						if (gen != get_nulls_value(pos))
 | 
				
			||||||
 | 
							goto restart;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* try the rest of the bins of the current generation */
 | 
				
			||||||
 | 
						bin = get_memcg_bin(bin + 1);
 | 
				
			||||||
 | 
						if (bin != first_bin)
 | 
				
			||||||
 | 
							goto restart;
 | 
				
			||||||
 | 
					success:
 | 
				
			||||||
 | 
						if (op)
 | 
				
			||||||
 | 
							lru_gen_rotate_memcg(lruvec, op);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mem_cgroup_put(memcg);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct blk_plug plug;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						VM_WARN_ON_ONCE(global_reclaim(sc));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lru_add_drain();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						blk_start_plug(&plug);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_mm_walk(lruvec_pgdat(lruvec));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (try_to_shrink_lruvec(lruvec, sc))
 | 
				
			||||||
 | 
							lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	clear_mm_walk();
 | 
						clear_mm_walk();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	blk_finish_plug(&plug);
 | 
						blk_finish_plug(&plug);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else /* !CONFIG_MEMCG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						BUILD_BUG();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						BUILD_BUG();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int priority;
 | 
				
			||||||
 | 
						unsigned long reclaimable;
 | 
				
			||||||
 | 
						struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Determine the initial priority based on ((total / MEMCG_NR_GENS) >>
 | 
				
			||||||
 | 
						 * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the
 | 
				
			||||||
 | 
						 * estimated reclaimed_to_scanned_ratio = inactive / total.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
 | 
				
			||||||
 | 
						if (get_swappiness(lruvec, sc))
 | 
				
			||||||
 | 
							reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						reclaimable /= MEMCG_NR_GENS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* round down reclaimable and round up sc->nr_to_reclaim */
 | 
				
			||||||
 | 
						priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sc->priority = clamp(priority, 0, DEF_PRIORITY);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct blk_plug plug;
 | 
				
			||||||
 | 
						unsigned long reclaimed = sc->nr_reclaimed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						VM_WARN_ON_ONCE(!global_reclaim(sc));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lru_add_drain();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						blk_start_plug(&plug);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_mm_walk(pgdat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_initial_priority(pgdat, sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (current_is_kswapd())
 | 
				
			||||||
 | 
							sc->nr_reclaimed = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (mem_cgroup_disabled())
 | 
				
			||||||
 | 
							shrink_one(&pgdat->__lruvec, sc);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							shrink_many(pgdat, sc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (current_is_kswapd())
 | 
				
			||||||
 | 
							sc->nr_reclaimed += reclaimed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						clear_mm_walk();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						blk_finish_plug(&plug);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* kswapd should never fail */
 | 
				
			||||||
 | 
						pgdat->kswapd_failures = 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int seg;
 | 
				
			||||||
 | 
						int old, new;
 | 
				
			||||||
 | 
						int bin = get_random_u32_below(MEMCG_NR_BINS);
 | 
				
			||||||
 | 
						struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						seg = 0;
 | 
				
			||||||
 | 
						new = old = lruvec->lrugen.gen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* see the comment on MEMCG_NR_GENS */
 | 
				
			||||||
 | 
						if (op == MEMCG_LRU_HEAD)
 | 
				
			||||||
 | 
							seg = MEMCG_LRU_HEAD;
 | 
				
			||||||
 | 
						else if (op == MEMCG_LRU_TAIL)
 | 
				
			||||||
 | 
							seg = MEMCG_LRU_TAIL;
 | 
				
			||||||
 | 
						else if (op == MEMCG_LRU_OLD)
 | 
				
			||||||
 | 
							new = get_memcg_gen(pgdat->memcg_lru.seq);
 | 
				
			||||||
 | 
						else if (op == MEMCG_LRU_YOUNG)
 | 
				
			||||||
 | 
							new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							VM_WARN_ON_ONCE(true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hlist_nulls_del_rcu(&lruvec->lrugen.list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
 | 
				
			||||||
 | 
							hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pgdat->memcg_lru.nr_memcgs[old]--;
 | 
				
			||||||
 | 
						pgdat->memcg_lru.nr_memcgs[new]++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lruvec->lrugen.gen = new;
 | 
				
			||||||
 | 
						WRITE_ONCE(lruvec->lrugen.seg, seg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
 | 
				
			||||||
 | 
							WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/******************************************************************************
 | 
					/******************************************************************************
 | 
				
			||||||
 *                          state change
 | 
					 *                          state change
 | 
				
			||||||
 ******************************************************************************/
 | 
					 ******************************************************************************/
 | 
				
			||||||
| 
						 | 
					@ -5655,11 +5879,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!mem_cgroup_disabled()) {
 | 
						if (!mem_cgroup_disabled()) {
 | 
				
			||||||
		rcu_read_lock();
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		memcg = mem_cgroup_from_id(memcg_id);
 | 
							memcg = mem_cgroup_from_id(memcg_id);
 | 
				
			||||||
#ifdef CONFIG_MEMCG
 | 
							if (!mem_cgroup_tryget(memcg))
 | 
				
			||||||
		if (memcg && !css_tryget(&memcg->css))
 | 
					 | 
				
			||||||
			memcg = NULL;
 | 
								memcg = NULL;
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
		rcu_read_unlock();
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!memcg)
 | 
							if (!memcg)
 | 
				
			||||||
| 
						 | 
					@ -5807,6 +6031,19 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_MEMCG
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void lru_gen_init_pgdat(struct pglist_data *pgdat)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i, j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_init(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < MEMCG_NR_GENS; i++) {
 | 
				
			||||||
 | 
							for (j = 0; j < MEMCG_NR_BINS; j++)
 | 
				
			||||||
 | 
								INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
					void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	INIT_LIST_HEAD(&memcg->mm_list.fifo);
 | 
						INIT_LIST_HEAD(&memcg->mm_list.fifo);
 | 
				
			||||||
| 
						 | 
					@ -5830,7 +6067,69 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
 | 
					void lru_gen_online_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int gen;
 | 
				
			||||||
 | 
						int nid;
 | 
				
			||||||
 | 
						int bin = get_random_u32_below(MEMCG_NR_BINS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_node(nid) {
 | 
				
			||||||
 | 
							struct pglist_data *pgdat = NODE_DATA(nid);
 | 
				
			||||||
 | 
							struct lruvec *lruvec = get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							gen = get_memcg_gen(pgdat->memcg_lru.seq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
 | 
				
			||||||
 | 
							pgdat->memcg_lru.nr_memcgs[gen]++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							lruvec->lrugen.gen = gen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_unlock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void lru_gen_offline_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int nid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_node(nid) {
 | 
				
			||||||
 | 
							struct lruvec *lruvec = get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void lru_gen_release_memcg(struct mem_cgroup *memcg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int gen;
 | 
				
			||||||
 | 
						int nid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_node(nid) {
 | 
				
			||||||
 | 
							struct pglist_data *pgdat = NODE_DATA(nid);
 | 
				
			||||||
 | 
							struct lruvec *lruvec = get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							gen = lruvec->lrugen.gen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							hlist_nulls_del_rcu(&lruvec->lrugen.list);
 | 
				
			||||||
 | 
							pgdat->memcg_lru.nr_memcgs[gen]--;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
 | 
				
			||||||
 | 
								WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_unlock(&pgdat->memcg_lru.lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif /* CONFIG_MEMCG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __init init_lru_gen(void)
 | 
					static int __init init_lru_gen(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -5857,6 +6156,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* CONFIG_LRU_GEN */
 | 
					#endif /* CONFIG_LRU_GEN */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
					static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
| 
						 | 
					@ -5870,7 +6173,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
				
			||||||
	bool proportional_reclaim;
 | 
						bool proportional_reclaim;
 | 
				
			||||||
	struct blk_plug plug;
 | 
						struct blk_plug plug;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (lru_gen_enabled()) {
 | 
						if (lru_gen_enabled() && !global_reclaim(sc)) {
 | 
				
			||||||
		lru_gen_shrink_lruvec(lruvec, sc);
 | 
							lru_gen_shrink_lruvec(lruvec, sc);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -6113,6 +6416,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 | 
				
			||||||
	struct lruvec *target_lruvec;
 | 
						struct lruvec *target_lruvec;
 | 
				
			||||||
	bool reclaimable = false;
 | 
						bool reclaimable = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (lru_gen_enabled() && global_reclaim(sc)) {
 | 
				
			||||||
 | 
							lru_gen_shrink_node(pgdat, sc);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 | 
						target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
again:
 | 
					again:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue