mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Acked-by: Hugh Dickins <hughd@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rong Chen <rong.a.chen@intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Yang Shi <yang.shi@linux.alibaba.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Alexander Duyck <alexander.duyck@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Jann Horn <jannh@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mika Penttilä <mika.penttila@nextfour.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			102 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			102 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0
 | 
						|
/*
 | 
						|
 * linux/mm/mmzone.c
 | 
						|
 *
 | 
						|
 * management codes for pgdats, zones and page flags
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
#include <linux/stddef.h>
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/mmzone.h>
 | 
						|
 | 
						|
struct pglist_data *first_online_pgdat(void)
 | 
						|
{
 | 
						|
	return NODE_DATA(first_online_node);
 | 
						|
}
 | 
						|
 | 
						|
struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
 | 
						|
{
 | 
						|
	int nid = next_online_node(pgdat->node_id);
 | 
						|
 | 
						|
	if (nid == MAX_NUMNODES)
 | 
						|
		return NULL;
 | 
						|
	return NODE_DATA(nid);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * next_zone - helper magic for for_each_zone()
 | 
						|
 */
 | 
						|
struct zone *next_zone(struct zone *zone)
 | 
						|
{
 | 
						|
	pg_data_t *pgdat = zone->zone_pgdat;
 | 
						|
 | 
						|
	if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
 | 
						|
		zone++;
 | 
						|
	else {
 | 
						|
		pgdat = next_online_pgdat(pgdat);
 | 
						|
		if (pgdat)
 | 
						|
			zone = pgdat->node_zones;
 | 
						|
		else
 | 
						|
			zone = NULL;
 | 
						|
	}
 | 
						|
	return zone;
 | 
						|
}
 | 
						|
 | 
						|
static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
 | 
						|
{
 | 
						|
#ifdef CONFIG_NUMA
 | 
						|
	return node_isset(zonelist_node_idx(zref), *nodes);
 | 
						|
#else
 | 
						|
	return 1;
 | 
						|
#endif /* CONFIG_NUMA */
 | 
						|
}
 | 
						|
 | 
						|
/* Returns the next zone at or below highest_zoneidx in a zonelist */
 | 
						|
struct zoneref *__next_zones_zonelist(struct zoneref *z,
 | 
						|
					enum zone_type highest_zoneidx,
 | 
						|
					nodemask_t *nodes)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * Find the next suitable zone to use for the allocation.
 | 
						|
	 * Only filter based on nodemask if it's set
 | 
						|
	 */
 | 
						|
	if (unlikely(nodes == NULL))
 | 
						|
		while (zonelist_zone_idx(z) > highest_zoneidx)
 | 
						|
			z++;
 | 
						|
	else
 | 
						|
		while (zonelist_zone_idx(z) > highest_zoneidx ||
 | 
						|
				(z->zone && !zref_in_nodemask(z, nodes)))
 | 
						|
			z++;
 | 
						|
 | 
						|
	return z;
 | 
						|
}
 | 
						|
 | 
						|
void lruvec_init(struct lruvec *lruvec)
 | 
						|
{
 | 
						|
	enum lru_list lru;
 | 
						|
 | 
						|
	memset(lruvec, 0, sizeof(struct lruvec));
 | 
						|
	spin_lock_init(&lruvec->lru_lock);
 | 
						|
 | 
						|
	for_each_lru(lru)
 | 
						|
		INIT_LIST_HEAD(&lruvec->lists[lru]);
 | 
						|
}
 | 
						|
 | 
						|
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
 | 
						|
int page_cpupid_xchg_last(struct page *page, int cpupid)
 | 
						|
{
 | 
						|
	unsigned long old_flags, flags;
 | 
						|
	int last_cpupid;
 | 
						|
 | 
						|
	do {
 | 
						|
		old_flags = flags = page->flags;
 | 
						|
		last_cpupid = page_cpupid_last(page);
 | 
						|
 | 
						|
		flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
 | 
						|
		flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 | 
						|
	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
 | 
						|
 | 
						|
	return last_cpupid;
 | 
						|
}
 | 
						|
#endif
 |