mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: multi-gen LRU: debugfs interface
Add /sys/kernel/debug/lru_gen for working set estimation and proactive reclaim. These techniques are commonly used to optimize job scheduling (bin packing) in data centers [1][2]. Compared with the page table-based approach and the PFN-based approach, this lruvec-based approach has the following advantages: 1. It offers better choices because it is aware of memcgs, NUMA nodes, shared mappings and unmapped page cache. 2. It is more scalable because it is O(nr_hot_pages), whereas the PFN-based approach is O(nr_total_pages). Add /sys/kernel/debug/lru_gen_full for debugging. [1] https://dl.acm.org/doi/10.1145/3297858.3304053 [2] https://dl.acm.org/doi/10.1145/3503222.3507731 Link: https://lkml.kernel.org/r/20220918080010.2920238-13-yuzhao@google.com Signed-off-by: Yu Zhao <yuzhao@google.com> Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com> Acked-by: Brian Geffon <bgeffon@google.com> Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org> Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name> Acked-by: Steven Barrett <steven@liquorix.net> Acked-by: Suleiman Souhlal <suleiman@google.com> Tested-by: Daniel Byrne <djbyrne@mtu.edu> Tested-by: Donald Carr <d@chaos-reins.com> Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com> Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru> Tested-by: Shuang Zhai <szhai2@cs.rochester.edu> Tested-by: Sofia Trinh <sofia.trinh@edi.works> Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Barry Song <baohua@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Hillf Danton <hdanton@sina.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Mike Rapoport <rppt@linux.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									1332a809d9
								
							
						
					
					
						commit
						d6c3af7d8a
					
				
					 2 changed files with 402 additions and 10 deletions
				
			
		| 
						 | 
					@ -493,6 +493,7 @@ static inline int num_node_state(enum node_states state)
 | 
				
			||||||
#define first_online_node	0
 | 
					#define first_online_node	0
 | 
				
			||||||
#define first_memory_node	0
 | 
					#define first_memory_node	0
 | 
				
			||||||
#define next_online_node(nid)	(MAX_NUMNODES)
 | 
					#define next_online_node(nid)	(MAX_NUMNODES)
 | 
				
			||||||
 | 
					#define next_memory_node(nid)	(MAX_NUMNODES)
 | 
				
			||||||
#define nr_node_ids		1U
 | 
					#define nr_node_ids		1U
 | 
				
			||||||
#define nr_online_nodes		1U
 | 
					#define nr_online_nodes		1U
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										411
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										411
									
								
								mm/vmscan.c
									
									
									
									
									
								
							| 
						 | 
					@ -52,6 +52,7 @@
 | 
				
			||||||
#include <linux/pagewalk.h>
 | 
					#include <linux/pagewalk.h>
 | 
				
			||||||
#include <linux/shmem_fs.h>
 | 
					#include <linux/shmem_fs.h>
 | 
				
			||||||
#include <linux/ctype.h>
 | 
					#include <linux/ctype.h>
 | 
				
			||||||
 | 
					#include <linux/debugfs.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/tlbflush.h>
 | 
					#include <asm/tlbflush.h>
 | 
				
			||||||
#include <asm/div64.h>
 | 
					#include <asm/div64.h>
 | 
				
			||||||
| 
						 | 
					@ -4197,12 +4198,40 @@ static void clear_mm_walk(void)
 | 
				
			||||||
		kfree(walk);
 | 
							kfree(walk);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void inc_min_seq(struct lruvec *lruvec, int type)
 | 
					static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int zone;
 | 
				
			||||||
 | 
						int remaining = MAX_LRU_BATCH;
 | 
				
			||||||
	struct lru_gen_struct *lrugen = &lruvec->lrugen;
 | 
						struct lru_gen_struct *lrugen = &lruvec->lrugen;
 | 
				
			||||||
 | 
						int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (type == LRU_GEN_ANON && !can_swap)
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* prevent cold/hot inversion if force_scan is true */
 | 
				
			||||||
 | 
						for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 | 
				
			||||||
 | 
							struct list_head *head = &lrugen->lists[old_gen][type][zone];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							while (!list_empty(head)) {
 | 
				
			||||||
 | 
								struct folio *folio = lru_to_folio(head);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
 | 
				
			||||||
 | 
								VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
 | 
				
			||||||
 | 
								VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
 | 
				
			||||||
 | 
								VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								new_gen = folio_inc_gen(lruvec, folio, false);
 | 
				
			||||||
 | 
								list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (!--remaining)
 | 
				
			||||||
 | 
									return false;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
	reset_ctrl_pos(lruvec, type, true);
 | 
						reset_ctrl_pos(lruvec, type, true);
 | 
				
			||||||
	WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
 | 
						WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
 | 
					static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
 | 
				
			||||||
| 
						 | 
					@ -4248,7 +4277,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
 | 
				
			||||||
	return success;
 | 
						return success;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
 | 
					static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int prev, next;
 | 
						int prev, next;
 | 
				
			||||||
	int type, zone;
 | 
						int type, zone;
 | 
				
			||||||
| 
						 | 
					@ -4262,9 +4291,13 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
 | 
				
			||||||
		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
 | 
							if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
 | 
							VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		inc_min_seq(lruvec, type);
 | 
							while (!inc_min_seq(lruvec, type, can_swap)) {
 | 
				
			||||||
 | 
								spin_unlock_irq(&lruvec->lru_lock);
 | 
				
			||||||
 | 
								cond_resched();
 | 
				
			||||||
 | 
								spin_lock_irq(&lruvec->lru_lock);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -4301,7 +4334,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
					static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
			       struct scan_control *sc, bool can_swap)
 | 
								       struct scan_control *sc, bool can_swap, bool force_scan)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	bool success;
 | 
						bool success;
 | 
				
			||||||
	struct lru_gen_mm_walk *walk;
 | 
						struct lru_gen_mm_walk *walk;
 | 
				
			||||||
| 
						 | 
					@ -4322,7 +4355,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
	 * handful of PTEs. Spreading the work out over a period of time usually
 | 
						 * handful of PTEs. Spreading the work out over a period of time usually
 | 
				
			||||||
	 * is less efficient, but it avoids bursty page faults.
 | 
						 * is less efficient, but it avoids bursty page faults.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
 | 
						if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
 | 
				
			||||||
		success = iterate_mm_list_nowalk(lruvec, max_seq);
 | 
							success = iterate_mm_list_nowalk(lruvec, max_seq);
 | 
				
			||||||
		goto done;
 | 
							goto done;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -4336,7 +4369,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
	walk->lruvec = lruvec;
 | 
						walk->lruvec = lruvec;
 | 
				
			||||||
	walk->max_seq = max_seq;
 | 
						walk->max_seq = max_seq;
 | 
				
			||||||
	walk->can_swap = can_swap;
 | 
						walk->can_swap = can_swap;
 | 
				
			||||||
	walk->force_scan = false;
 | 
						walk->force_scan = force_scan;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		success = iterate_mm_list(lruvec, walk, &mm);
 | 
							success = iterate_mm_list(lruvec, walk, &mm);
 | 
				
			||||||
| 
						 | 
					@ -4356,7 +4389,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
 | 
						VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inc_max_seq(lruvec, can_swap);
 | 
						inc_max_seq(lruvec, can_swap, force_scan);
 | 
				
			||||||
	/* either this sees any waiters or they will see updated max_seq */
 | 
						/* either this sees any waiters or they will see updated max_seq */
 | 
				
			||||||
	if (wq_has_sleeper(&lruvec->mm_state.wait))
 | 
						if (wq_has_sleeper(&lruvec->mm_state.wait))
 | 
				
			||||||
		wake_up_all(&lruvec->mm_state.wait);
 | 
							wake_up_all(&lruvec->mm_state.wait);
 | 
				
			||||||
| 
						 | 
					@ -4454,7 +4487,7 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (need_aging)
 | 
						if (need_aging)
 | 
				
			||||||
		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
 | 
							try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -5013,7 +5046,7 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
 | 
				
			||||||
	if (current_is_kswapd())
 | 
						if (current_is_kswapd())
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
 | 
						if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
 | 
				
			||||||
		return nr_to_scan;
 | 
							return nr_to_scan;
 | 
				
			||||||
done:
 | 
					done:
 | 
				
			||||||
	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
 | 
						return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
 | 
				
			||||||
| 
						 | 
					@ -5352,6 +5385,361 @@ static struct attribute_group lru_gen_attr_group = {
 | 
				
			||||||
	.attrs = lru_gen_attrs,
 | 
						.attrs = lru_gen_attrs,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/******************************************************************************
 | 
				
			||||||
 | 
					 *                          debugfs interface
 | 
				
			||||||
 | 
					 ******************************************************************************/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
 | 
						loff_t nr_to_skip = *pos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!m->private)
 | 
				
			||||||
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						memcg = mem_cgroup_iter(NULL, NULL, NULL);
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							int nid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for_each_node_state(nid, N_MEMORY) {
 | 
				
			||||||
 | 
								if (!nr_to_skip--)
 | 
				
			||||||
 | 
									return get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_seq_stop(struct seq_file *m, void *v)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (!IS_ERR_OR_NULL(v))
 | 
				
			||||||
 | 
							mem_cgroup_iter_break(NULL, lruvec_memcg(v));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						kvfree(m->private);
 | 
				
			||||||
 | 
						m->private = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int nid = lruvec_pgdat(v)->node_id;
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg = lruvec_memcg(v);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						++*pos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						nid = next_memory_node(nid);
 | 
				
			||||||
 | 
						if (nid == MAX_NUMNODES) {
 | 
				
			||||||
 | 
							memcg = mem_cgroup_iter(NULL, memcg, NULL);
 | 
				
			||||||
 | 
							if (!memcg)
 | 
				
			||||||
 | 
								return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							nid = first_memory_node;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
 | 
				
			||||||
 | 
									  unsigned long max_seq, unsigned long *min_seq,
 | 
				
			||||||
 | 
									  unsigned long seq)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						int type, tier;
 | 
				
			||||||
 | 
						int hist = lru_hist_from_seq(seq);
 | 
				
			||||||
 | 
						struct lru_gen_struct *lrugen = &lruvec->lrugen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (tier = 0; tier < MAX_NR_TIERS; tier++) {
 | 
				
			||||||
 | 
							seq_printf(m, "            %10d", tier);
 | 
				
			||||||
 | 
							for (type = 0; type < ANON_AND_FILE; type++) {
 | 
				
			||||||
 | 
								const char *s = "   ";
 | 
				
			||||||
 | 
								unsigned long n[3] = {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (seq == max_seq) {
 | 
				
			||||||
 | 
									s = "RT ";
 | 
				
			||||||
 | 
									n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
 | 
				
			||||||
 | 
									n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
 | 
				
			||||||
 | 
								} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
 | 
				
			||||||
 | 
									s = "rep";
 | 
				
			||||||
 | 
									n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
 | 
				
			||||||
 | 
									n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
 | 
				
			||||||
 | 
									if (tier)
 | 
				
			||||||
 | 
										n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								for (i = 0; i < 3; i++)
 | 
				
			||||||
 | 
									seq_printf(m, " %10lu%c", n[i], s[i]);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							seq_putc(m, '\n');
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						seq_puts(m, "                      ");
 | 
				
			||||||
 | 
						for (i = 0; i < NR_MM_STATS; i++) {
 | 
				
			||||||
 | 
							const char *s = "      ";
 | 
				
			||||||
 | 
							unsigned long n = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (seq == max_seq && NR_HIST_GENS == 1) {
 | 
				
			||||||
 | 
								s = "LOYNFA";
 | 
				
			||||||
 | 
								n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
 | 
				
			||||||
 | 
							} else if (seq != max_seq && NR_HIST_GENS > 1) {
 | 
				
			||||||
 | 
								s = "loynfa";
 | 
				
			||||||
 | 
								n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							seq_printf(m, " %10lu%c", n, s[i]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						seq_putc(m, '\n');
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int lru_gen_seq_show(struct seq_file *m, void *v)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long seq;
 | 
				
			||||||
 | 
						bool full = !debugfs_real_fops(m->file)->write;
 | 
				
			||||||
 | 
						struct lruvec *lruvec = v;
 | 
				
			||||||
 | 
						struct lru_gen_struct *lrugen = &lruvec->lrugen;
 | 
				
			||||||
 | 
						int nid = lruvec_pgdat(lruvec)->node_id;
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
				
			||||||
 | 
						DEFINE_MAX_SEQ(lruvec);
 | 
				
			||||||
 | 
						DEFINE_MIN_SEQ(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (nid == first_memory_node) {
 | 
				
			||||||
 | 
							const char *path = memcg ? m->private : "";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
							if (memcg)
 | 
				
			||||||
 | 
								cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
							seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						seq_printf(m, " node %5d\n", nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!full)
 | 
				
			||||||
 | 
							seq = min_seq[LRU_GEN_ANON];
 | 
				
			||||||
 | 
						else if (max_seq >= MAX_NR_GENS)
 | 
				
			||||||
 | 
							seq = max_seq - MAX_NR_GENS + 1;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							seq = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (; seq <= max_seq; seq++) {
 | 
				
			||||||
 | 
							int type, zone;
 | 
				
			||||||
 | 
							int gen = lru_gen_from_seq(seq);
 | 
				
			||||||
 | 
							unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (type = 0; type < ANON_AND_FILE; type++) {
 | 
				
			||||||
 | 
								unsigned long size = 0;
 | 
				
			||||||
 | 
								char mark = full && seq < min_seq[type] ? 'x' : ' ';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								for (zone = 0; zone < MAX_NR_ZONES; zone++)
 | 
				
			||||||
 | 
									size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								seq_printf(m, " %10lu%c", size, mark);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							seq_putc(m, '\n');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (full)
 | 
				
			||||||
 | 
								lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct seq_operations lru_gen_seq_ops = {
 | 
				
			||||||
 | 
						.start = lru_gen_seq_start,
 | 
				
			||||||
 | 
						.stop = lru_gen_seq_stop,
 | 
				
			||||||
 | 
						.next = lru_gen_seq_next,
 | 
				
			||||||
 | 
						.show = lru_gen_seq_show,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
 | 
				
			||||||
 | 
							     bool can_swap, bool force_scan)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						DEFINE_MAX_SEQ(lruvec);
 | 
				
			||||||
 | 
						DEFINE_MIN_SEQ(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (seq < max_seq)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (seq > max_seq)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
 | 
				
			||||||
 | 
							return -ERANGE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
 | 
				
			||||||
 | 
								int swappiness, unsigned long nr_to_reclaim)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						DEFINE_MAX_SEQ(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (seq + MIN_NR_GENS > max_seq)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sc->nr_reclaimed = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (!signal_pending(current)) {
 | 
				
			||||||
 | 
							DEFINE_MIN_SEQ(lruvec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (seq < min_seq[!swappiness])
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (sc->nr_reclaimed >= nr_to_reclaim)
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!evict_folios(lruvec, sc, swappiness, NULL))
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							cond_resched();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return -EINTR;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
 | 
				
			||||||
 | 
							   struct scan_control *sc, int swappiness, unsigned long opt)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct lruvec *lruvec;
 | 
				
			||||||
 | 
						int err = -EINVAL;
 | 
				
			||||||
 | 
						struct mem_cgroup *memcg = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!mem_cgroup_disabled()) {
 | 
				
			||||||
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
							memcg = mem_cgroup_from_id(memcg_id);
 | 
				
			||||||
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
 | 
							if (memcg && !css_tryget(&memcg->css))
 | 
				
			||||||
 | 
								memcg = NULL;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!memcg)
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (memcg_id != mem_cgroup_id(memcg))
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lruvec = get_lruvec(memcg, nid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (swappiness < 0)
 | 
				
			||||||
 | 
							swappiness = get_swappiness(lruvec, sc);
 | 
				
			||||||
 | 
						else if (swappiness > 200)
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						switch (cmd) {
 | 
				
			||||||
 | 
						case '+':
 | 
				
			||||||
 | 
							err = run_aging(lruvec, seq, sc, swappiness, opt);
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
 | 
						case '-':
 | 
				
			||||||
 | 
							err = run_eviction(lruvec, seq, sc, swappiness, opt);
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
 | 
						mem_cgroup_put(memcg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
 | 
				
			||||||
 | 
									 size_t len, loff_t *pos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						void *buf;
 | 
				
			||||||
 | 
						char *cur, *next;
 | 
				
			||||||
 | 
						unsigned int flags;
 | 
				
			||||||
 | 
						struct blk_plug plug;
 | 
				
			||||||
 | 
						int err = -EINVAL;
 | 
				
			||||||
 | 
						struct scan_control sc = {
 | 
				
			||||||
 | 
							.may_writepage = true,
 | 
				
			||||||
 | 
							.may_unmap = true,
 | 
				
			||||||
 | 
							.may_swap = true,
 | 
				
			||||||
 | 
							.reclaim_idx = MAX_NR_ZONES - 1,
 | 
				
			||||||
 | 
							.gfp_mask = GFP_KERNEL,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						buf = kvmalloc(len + 1, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!buf)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (copy_from_user(buf, src, len)) {
 | 
				
			||||||
 | 
							kvfree(buf);
 | 
				
			||||||
 | 
							return -EFAULT;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_task_reclaim_state(current, &sc.reclaim_state);
 | 
				
			||||||
 | 
						flags = memalloc_noreclaim_save();
 | 
				
			||||||
 | 
						blk_start_plug(&plug);
 | 
				
			||||||
 | 
						if (!set_mm_walk(NULL)) {
 | 
				
			||||||
 | 
							err = -ENOMEM;
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						next = buf;
 | 
				
			||||||
 | 
						next[len] = '\0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ((cur = strsep(&next, ",;\n"))) {
 | 
				
			||||||
 | 
							int n;
 | 
				
			||||||
 | 
							int end;
 | 
				
			||||||
 | 
							char cmd;
 | 
				
			||||||
 | 
							unsigned int memcg_id;
 | 
				
			||||||
 | 
							unsigned int nid;
 | 
				
			||||||
 | 
							unsigned long seq;
 | 
				
			||||||
 | 
							unsigned int swappiness = -1;
 | 
				
			||||||
 | 
							unsigned long opt = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							cur = skip_spaces(cur);
 | 
				
			||||||
 | 
							if (!*cur)
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
 | 
				
			||||||
 | 
								   &seq, &end, &swappiness, &end, &opt, &end);
 | 
				
			||||||
 | 
							if (n < 4 || cur[end]) {
 | 
				
			||||||
 | 
								err = -EINVAL;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
 | 
				
			||||||
 | 
							if (err)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
 | 
						clear_mm_walk();
 | 
				
			||||||
 | 
						blk_finish_plug(&plug);
 | 
				
			||||||
 | 
						memalloc_noreclaim_restore(flags);
 | 
				
			||||||
 | 
						set_task_reclaim_state(current, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						kvfree(buf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err ? : len;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int lru_gen_seq_open(struct inode *inode, struct file *file)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return seq_open(file, &lru_gen_seq_ops);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct file_operations lru_gen_rw_fops = {
 | 
				
			||||||
 | 
						.open = lru_gen_seq_open,
 | 
				
			||||||
 | 
						.read = seq_read,
 | 
				
			||||||
 | 
						.write = lru_gen_seq_write,
 | 
				
			||||||
 | 
						.llseek = seq_lseek,
 | 
				
			||||||
 | 
						.release = seq_release,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct file_operations lru_gen_ro_fops = {
 | 
				
			||||||
 | 
						.open = lru_gen_seq_open,
 | 
				
			||||||
 | 
						.read = seq_read,
 | 
				
			||||||
 | 
						.llseek = seq_lseek,
 | 
				
			||||||
 | 
						.release = seq_release,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/******************************************************************************
 | 
					/******************************************************************************
 | 
				
			||||||
 *                          initialization
 | 
					 *                          initialization
 | 
				
			||||||
 ******************************************************************************/
 | 
					 ******************************************************************************/
 | 
				
			||||||
| 
						 | 
					@ -5409,6 +5797,9 @@ static int __init init_lru_gen(void)
 | 
				
			||||||
	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
 | 
						if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
 | 
				
			||||||
		pr_err("lru_gen: failed to create sysfs group\n");
 | 
							pr_err("lru_gen: failed to create sysfs group\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
 | 
				
			||||||
 | 
						debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
late_initcall(init_lru_gen);
 | 
					late_initcall(init_lru_gen);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue