forked from mirrors/linux
		
	mm/page_alloc: convert per-cpu list protection to local_lock
There is a lack of clarity of what exactly local_irq_save/local_irq_restore protects in page_alloc.c . It conflates the protection of per-cpu page allocation structures with per-cpu vmstat deltas. This patch protects the PCP structure using local_lock which for most configurations is identical to IRQ enabling/disabling. The scope of the lock is still wider than it should be but this is decreased later. It is possible for the local_lock to be embedded safely within struct per_cpu_pages but it adds complexity to free_unref_page_list. [akpm@linux-foundation.org: coding style fixes] [mgorman@techsingularity.net: work around a pahole limitation with zero-sized struct pagesets] Link: https://lkml.kernel.org/r/20210526080741.GW30378@techsingularity.net [lkp@intel.com: Make pagesets static] Link: https://lkml.kernel.org/r/20210512095458.30632-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									28f836b677
								
							
						
					
					
						commit
						dbbee9d5cd
					
				
					 3 changed files with 51 additions and 15 deletions
				
			
		|  | @ -20,6 +20,7 @@ | ||||||
| #include <linux/atomic.h> | #include <linux/atomic.h> | ||||||
| #include <linux/mm_types.h> | #include <linux/mm_types.h> | ||||||
| #include <linux/page-flags.h> | #include <linux/page-flags.h> | ||||||
|  | #include <linux/local_lock.h> | ||||||
| #include <asm/page.h> | #include <asm/page.h> | ||||||
| 
 | 
 | ||||||
| /* Free memory management - zoned buddy allocator.  */ | /* Free memory management - zoned buddy allocator.  */ | ||||||
|  | @ -337,6 +338,7 @@ enum zone_watermarks { | ||||||
| #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) | #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) | ||||||
| #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) | #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) | ||||||
| 
 | 
 | ||||||
|  | /* Fields and list protected by pagesets local_lock in page_alloc.c */ | ||||||
| struct per_cpu_pages { | struct per_cpu_pages { | ||||||
| 	int count;		/* number of pages in the list */ | 	int count;		/* number of pages in the list */ | ||||||
| 	int high;		/* high watermark, emptying needed */ | 	int high;		/* high watermark, emptying needed */ | ||||||
|  |  | ||||||
|  | @ -313,6 +313,9 @@ config DEBUG_INFO_BTF | ||||||
| config PAHOLE_HAS_SPLIT_BTF | config PAHOLE_HAS_SPLIT_BTF | ||||||
| 	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") | 	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") | ||||||
| 
 | 
 | ||||||
|  | config PAHOLE_HAS_ZEROSIZE_PERCPU_SUPPORT | ||||||
|  | 	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "122") | ||||||
|  | 
 | ||||||
| config DEBUG_INFO_BTF_MODULES | config DEBUG_INFO_BTF_MODULES | ||||||
| 	def_bool y | 	def_bool y | ||||||
| 	depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF | 	depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF | ||||||
|  |  | ||||||
|  | @ -122,6 +122,24 @@ typedef int __bitwise fpi_t; | ||||||
| static DEFINE_MUTEX(pcp_batch_high_lock); | static DEFINE_MUTEX(pcp_batch_high_lock); | ||||||
| #define MIN_PERCPU_PAGELIST_FRACTION	(8) | #define MIN_PERCPU_PAGELIST_FRACTION	(8) | ||||||
| 
 | 
 | ||||||
|  | struct pagesets { | ||||||
|  | 	local_lock_t lock; | ||||||
|  | #if defined(CONFIG_DEBUG_INFO_BTF) &&				\ | ||||||
|  | 	!defined(CONFIG_DEBUG_LOCK_ALLOC) &&			\ | ||||||
|  | 	!defined(CONFIG_PAHOLE_HAS_ZEROSIZE_PERCPU_SUPPORT) | ||||||
|  | 	/*
 | ||||||
|  | 	 * pahole 1.21 and earlier gets confused by zero-sized per-CPU | ||||||
|  | 	 * variables and produces invalid BTF. Ensure that | ||||||
|  | 	 * sizeof(struct pagesets) != 0 for older versions of pahole. | ||||||
|  | 	 */ | ||||||
|  | 	char __pahole_hack; | ||||||
|  | 	#warning "pahole too old to support zero-sized struct pagesets" | ||||||
|  | #endif | ||||||
|  | }; | ||||||
|  | static DEFINE_PER_CPU(struct pagesets, pagesets) = { | ||||||
|  | 	.lock = INIT_LOCAL_LOCK(lock), | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | ||||||
| DEFINE_PER_CPU(int, numa_node); | DEFINE_PER_CPU(int, numa_node); | ||||||
| EXPORT_PER_CPU_SYMBOL(numa_node); | EXPORT_PER_CPU_SYMBOL(numa_node); | ||||||
|  | @ -1453,6 +1471,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, | ||||||
| 		} while (--count && --batch_free && !list_empty(list)); | 		} while (--count && --batch_free && !list_empty(list)); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * local_lock_irq held so equivalent to spin_lock_irqsave for | ||||||
|  | 	 * both PREEMPT_RT and non-PREEMPT_RT configurations. | ||||||
|  | 	 */ | ||||||
| 	spin_lock(&zone->lock); | 	spin_lock(&zone->lock); | ||||||
| 	isolated_pageblocks = has_isolate_pageblock(zone); | 	isolated_pageblocks = has_isolate_pageblock(zone); | ||||||
| 
 | 
 | ||||||
|  | @ -1573,6 +1595,11 @@ static void __free_pages_ok(struct page *page, unsigned int order, | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	migratetype = get_pfnblock_migratetype(page, pfn); | 	migratetype = get_pfnblock_migratetype(page, pfn); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock | ||||||
|  | 	 * and protect vmstat updates. | ||||||
|  | 	 */ | ||||||
| 	local_irq_save(flags); | 	local_irq_save(flags); | ||||||
| 	__count_vm_events(PGFREE, 1 << order); | 	__count_vm_events(PGFREE, 1 << order); | ||||||
| 	free_one_page(page_zone(page), page, pfn, order, migratetype, | 	free_one_page(page_zone(page), page, pfn, order, migratetype, | ||||||
|  | @ -2955,6 +2982,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | ||||||
| { | { | ||||||
| 	int i, allocated = 0; | 	int i, allocated = 0; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * local_lock_irq held so equivalent to spin_lock_irqsave for | ||||||
|  | 	 * both PREEMPT_RT and non-PREEMPT_RT configurations. | ||||||
|  | 	 */ | ||||||
| 	spin_lock(&zone->lock); | 	spin_lock(&zone->lock); | ||||||
| 	for (i = 0; i < count; ++i) { | 	for (i = 0; i < count; ++i) { | ||||||
| 		struct page *page = __rmqueue(zone, order, migratetype, | 		struct page *page = __rmqueue(zone, order, migratetype, | ||||||
|  | @ -3007,12 +3038,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | ||||||
| 	unsigned long flags; | 	unsigned long flags; | ||||||
| 	int to_drain, batch; | 	int to_drain, batch; | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 	batch = READ_ONCE(pcp->batch); | 	batch = READ_ONCE(pcp->batch); | ||||||
| 	to_drain = min(pcp->count, batch); | 	to_drain = min(pcp->count, batch); | ||||||
| 	if (to_drain > 0) | 	if (to_drain > 0) | ||||||
| 		free_pcppages_bulk(zone, to_drain, pcp); | 		free_pcppages_bulk(zone, to_drain, pcp); | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | @ -3028,13 +3059,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) | ||||||
| 	unsigned long flags; | 	unsigned long flags; | ||||||
| 	struct per_cpu_pages *pcp; | 	struct per_cpu_pages *pcp; | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 
 | 
 | ||||||
| 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); | 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); | ||||||
| 	if (pcp->count) | 	if (pcp->count) | ||||||
| 		free_pcppages_bulk(zone, pcp->count, pcp); | 		free_pcppages_bulk(zone, pcp->count, pcp); | ||||||
| 
 | 
 | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -3297,9 +3328,9 @@ void free_unref_page(struct page *page) | ||||||
| 	if (!free_unref_page_prepare(page, pfn)) | 	if (!free_unref_page_prepare(page, pfn)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 	free_unref_page_commit(page, pfn); | 	free_unref_page_commit(page, pfn); | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -3319,7 +3350,7 @@ void free_unref_page_list(struct list_head *list) | ||||||
| 		set_page_private(page, pfn); | 		set_page_private(page, pfn); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 	list_for_each_entry_safe(page, next, list, lru) { | 	list_for_each_entry_safe(page, next, list, lru) { | ||||||
| 		unsigned long pfn = page_private(page); | 		unsigned long pfn = page_private(page); | ||||||
| 
 | 
 | ||||||
|  | @ -3332,12 +3363,12 @@ void free_unref_page_list(struct list_head *list) | ||||||
| 		 * a large list of pages to free. | 		 * a large list of pages to free. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (++batch_count == SWAP_CLUSTER_MAX) { | 		if (++batch_count == SWAP_CLUSTER_MAX) { | ||||||
| 			local_irq_restore(flags); | 			local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| 			batch_count = 0; | 			batch_count = 0; | ||||||
| 			local_irq_save(flags); | 			local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -3494,7 +3525,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 	unsigned long flags; | 	unsigned long flags; | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 	pcp = this_cpu_ptr(zone->per_cpu_pageset); | 	pcp = this_cpu_ptr(zone->per_cpu_pageset); | ||||||
| 	list = &pcp->lists[migratetype]; | 	list = &pcp->lists[migratetype]; | ||||||
| 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list); | 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list); | ||||||
|  | @ -3502,7 +3533,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | ||||||
| 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1); | 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1); | ||||||
| 		zone_statistics(preferred_zone, zone); | 		zone_statistics(preferred_zone, zone); | ||||||
| 	} | 	} | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| 	return page; | 	return page; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -5103,7 +5134,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, | ||||||
| 		goto failed; | 		goto failed; | ||||||
| 
 | 
 | ||||||
| 	/* Attempt the batch allocation */ | 	/* Attempt the batch allocation */ | ||||||
| 	local_irq_save(flags); | 	local_lock_irqsave(&pagesets.lock, flags); | ||||||
| 	pcp = this_cpu_ptr(zone->per_cpu_pageset); | 	pcp = this_cpu_ptr(zone->per_cpu_pageset); | ||||||
| 	pcp_list = &pcp->lists[ac.migratetype]; | 	pcp_list = &pcp->lists[ac.migratetype]; | ||||||
| 
 | 
 | ||||||
|  | @ -5141,12 +5172,12 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, | ||||||
| 		nr_populated++; | 		nr_populated++; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| 
 | 
 | ||||||
| 	return nr_populated; | 	return nr_populated; | ||||||
| 
 | 
 | ||||||
| failed_irq: | failed_irq: | ||||||
| 	local_irq_restore(flags); | 	local_unlock_irqrestore(&pagesets.lock, flags); | ||||||
| 
 | 
 | ||||||
| failed: | failed: | ||||||
| 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask); | 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Mel Gorman
						Mel Gorman