mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify()
When the MM_WALK capability is enabled, memory that is mostly accessed by
a VM appears younger than it really is, therefore this memory will be less
likely to be evicted.  Therefore, the presence of a running VM can
significantly increase swap-outs for non-VM memory, regressing the
performance for the rest of the system.
Fix this regression by always calling {ptep,pmdp}_clear_young_notify()
whenever we clear the young bits on PMDs/PTEs.
[jthoughton@google.com: fix link-time error]
Link: https://lkml.kernel.org/r/20241019012940.3656292-3-jthoughton@google.com
Fixes: bd74fdaea1 ("mm: multi-gen LRU: support page table walks")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reported-by: David Stevens <stevensd@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: <stable@vger.kernel.org>
Cc: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									ddd6d8e975
								
							
						
					
					
						commit
						1d4832becd
					
				
					 3 changed files with 55 additions and 47 deletions
				
			
		| 
						 | 
				
			
			@ -555,7 +555,7 @@ struct lru_gen_memcg {
 | 
			
		|||
 | 
			
		||||
void lru_gen_init_pgdat(struct pglist_data *pgdat);
 | 
			
		||||
void lru_gen_init_lruvec(struct lruvec *lruvec);
 | 
			
		||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
 | 
			
		||||
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
 | 
			
		||||
 | 
			
		||||
void lru_gen_init_memcg(struct mem_cgroup *memcg);
 | 
			
		||||
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
 | 
			
		||||
| 
						 | 
				
			
			@ -574,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
 | 
			
		|||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		||||
static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		||||
{
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -885,13 +885,10 @@ static bool folio_referenced_one(struct folio *folio,
 | 
			
		|||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (pvmw.pte) {
 | 
			
		||||
			if (lru_gen_enabled() &&
 | 
			
		||||
			    pte_young(ptep_get(pvmw.pte))) {
 | 
			
		||||
				lru_gen_look_around(&pvmw);
 | 
			
		||||
		if (lru_gen_enabled() && pvmw.pte) {
 | 
			
		||||
			if (lru_gen_look_around(&pvmw))
 | 
			
		||||
				referenced++;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		} else if (pvmw.pte) {
 | 
			
		||||
			if (ptep_clear_flush_young_notify(vma, address,
 | 
			
		||||
						pvmw.pte))
 | 
			
		||||
				referenced++;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										86
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										86
									
								
								mm/vmscan.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -56,6 +56,7 @@
 | 
			
		|||
#include <linux/khugepaged.h>
 | 
			
		||||
#include <linux/rculist_nulls.h>
 | 
			
		||||
#include <linux/random.h>
 | 
			
		||||
#include <linux/mmu_notifier.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/tlbflush.h>
 | 
			
		||||
#include <asm/div64.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -3294,7 +3295,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk
 | 
			
		|||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
 | 
			
		||||
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr,
 | 
			
		||||
				 struct pglist_data *pgdat)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long pfn = pte_pfn(pte);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3306,13 +3308,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
 | 
			
		|||
	if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON_ONCE(!pfn_valid(pfn)))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	return pfn;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
 | 
			
		||||
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr,
 | 
			
		||||
				 struct pglist_data *pgdat)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long pfn = pmd_pfn(pmd);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3324,9 +3333,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
 | 
			
		|||
	if (WARN_ON_ONCE(pmd_devmap(pmd)))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON_ONCE(!pfn_valid(pfn)))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
 | 
			
		||||
		return -1;
 | 
			
		||||
 | 
			
		||||
	return pfn;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3335,10 +3350,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
 | 
			
		|||
{
 | 
			
		||||
	struct folio *folio;
 | 
			
		||||
 | 
			
		||||
	/* try to avoid unnecessary memory loads */
 | 
			
		||||
	if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	folio = pfn_folio(pfn);
 | 
			
		||||
	if (folio_nid(folio) != pgdat->node_id)
 | 
			
		||||
		return NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -3394,20 +3405,16 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
 | 
			
		|||
		total++;
 | 
			
		||||
		walk->mm_stats[MM_LEAF_TOTAL]++;
 | 
			
		||||
 | 
			
		||||
		pfn = get_pte_pfn(ptent, args->vma, addr);
 | 
			
		||||
		pfn = get_pte_pfn(ptent, args->vma, addr, pgdat);
 | 
			
		||||
		if (pfn == -1)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (!pte_young(ptent)) {
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
 | 
			
		||||
		if (!folio)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
 | 
			
		||||
			VM_WARN_ON_ONCE(true);
 | 
			
		||||
		if (!ptep_clear_young_notify(args->vma, addr, pte + i))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		young++;
 | 
			
		||||
		walk->mm_stats[MM_LEAF_YOUNG]++;
 | 
			
		||||
| 
						 | 
				
			
			@ -3473,21 +3480,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
 | 
			
		|||
		/* don't round down the first address */
 | 
			
		||||
		addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
 | 
			
		||||
 | 
			
		||||
		pfn = get_pmd_pfn(pmd[i], vma, addr);
 | 
			
		||||
		if (pfn == -1)
 | 
			
		||||
		if (!pmd_present(pmd[i]))
 | 
			
		||||
			goto next;
 | 
			
		||||
 | 
			
		||||
		if (!pmd_trans_huge(pmd[i])) {
 | 
			
		||||
			if (!walk->force_scan && should_clear_pmd_young())
 | 
			
		||||
			if (!walk->force_scan && should_clear_pmd_young() &&
 | 
			
		||||
			    !mm_has_notifiers(args->mm))
 | 
			
		||||
				pmdp_test_and_clear_young(vma, addr, pmd + i);
 | 
			
		||||
			goto next;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat);
 | 
			
		||||
		if (pfn == -1)
 | 
			
		||||
			goto next;
 | 
			
		||||
 | 
			
		||||
		folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
 | 
			
		||||
		if (!folio)
 | 
			
		||||
			goto next;
 | 
			
		||||
 | 
			
		||||
		if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
 | 
			
		||||
		if (!pmdp_clear_young_notify(vma, addr, pmd + i))
 | 
			
		||||
			goto next;
 | 
			
		||||
 | 
			
		||||
		walk->mm_stats[MM_LEAF_YOUNG]++;
 | 
			
		||||
| 
						 | 
				
			
			@ -3545,24 +3556,18 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
 | 
			
		|||
		}
 | 
			
		||||
 | 
			
		||||
		if (pmd_trans_huge(val)) {
 | 
			
		||||
			unsigned long pfn = pmd_pfn(val);
 | 
			
		||||
			struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
 | 
			
		||||
			unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat);
 | 
			
		||||
 | 
			
		||||
			walk->mm_stats[MM_LEAF_TOTAL]++;
 | 
			
		||||
 | 
			
		||||
			if (!pmd_young(val)) {
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* try to avoid unnecessary memory loads */
 | 
			
		||||
			if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			if (pfn != -1)
 | 
			
		||||
				walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (!walk->force_scan && should_clear_pmd_young()) {
 | 
			
		||||
		if (!walk->force_scan && should_clear_pmd_young() &&
 | 
			
		||||
		    !mm_has_notifiers(args->mm)) {
 | 
			
		||||
			if (!pmd_young(val))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4036,13 +4041,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
			
		|||
 * the PTE table to the Bloom filter. This forms a feedback loop between the
 | 
			
		||||
 * eviction and the aging.
 | 
			
		||||
 */
 | 
			
		||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		||||
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	unsigned long start;
 | 
			
		||||
	unsigned long end;
 | 
			
		||||
	struct lru_gen_mm_walk *walk;
 | 
			
		||||
	int young = 0;
 | 
			
		||||
	int young = 1;
 | 
			
		||||
	pte_t *pte = pvmw->pte;
 | 
			
		||||
	unsigned long addr = pvmw->address;
 | 
			
		||||
	struct vm_area_struct *vma = pvmw->vma;
 | 
			
		||||
| 
						 | 
				
			
			@ -4058,12 +4063,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		|||
	lockdep_assert_held(pvmw->ptl);
 | 
			
		||||
	VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
 | 
			
		||||
 | 
			
		||||
	if (!ptep_clear_young_notify(vma, addr, pte))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (spin_is_contended(pvmw->ptl))
 | 
			
		||||
		return;
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* exclude special VMAs containing anon pages from COW */
 | 
			
		||||
	if (vma->vm_flags & VM_SPECIAL)
 | 
			
		||||
		return;
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* avoid taking the LRU lock under the PTL when possible */
 | 
			
		||||
	walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -4071,6 +4079,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		|||
	start = max(addr & PMD_MASK, vma->vm_start);
 | 
			
		||||
	end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1;
 | 
			
		||||
 | 
			
		||||
	if (end - start == PAGE_SIZE)
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
 | 
			
		||||
		if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
 | 
			
		||||
			end = start + MIN_LRU_BATCH * PAGE_SIZE;
 | 
			
		||||
| 
						 | 
				
			
			@ -4084,7 +4095,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		|||
 | 
			
		||||
	/* folio_update_gen() requires stable folio_memcg() */
 | 
			
		||||
	if (!mem_cgroup_trylock_pages(memcg))
 | 
			
		||||
		return;
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	arch_enter_lazy_mmu_mode();
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4094,19 +4105,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		|||
		unsigned long pfn;
 | 
			
		||||
		pte_t ptent = ptep_get(pte + i);
 | 
			
		||||
 | 
			
		||||
		pfn = get_pte_pfn(ptent, vma, addr);
 | 
			
		||||
		pfn = get_pte_pfn(ptent, vma, addr, pgdat);
 | 
			
		||||
		if (pfn == -1)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (!pte_young(ptent))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
 | 
			
		||||
		if (!folio)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (!ptep_test_and_clear_young(vma, addr, pte + i))
 | 
			
		||||
			VM_WARN_ON_ONCE(true);
 | 
			
		||||
		if (!ptep_clear_young_notify(vma, addr, pte + i))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		young++;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4136,6 +4144,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		|||
	/* feedback from rmap walkers to page table walkers */
 | 
			
		||||
	if (mm_state && suitable_to_scan(i, young))
 | 
			
		||||
		update_bloom_filter(mm_state, max_seq, pvmw->pmd);
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/******************************************************************************
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue