forked from mirrors/linux
		
	mm/page_alloc: add page->buddy_list and page->pcp_list
Patch series "Drain remote per-cpu directly", v5. Some setups, notably NOHZ_FULL CPUs, may be running realtime or latency-sensitive applications that cannot tolerate interference due to per-cpu drain work queued by __drain_all_pages(). Introduce a new mechanism to remotely drain the per-cpu lists. It is made possible by remotely locking 'struct per_cpu_pages' new per-cpu spinlocks. This has two advantages, the time to drain is more predictable and other unrelated tasks are not interrupted. This series has the same intent as Nicolas' series "mm/page_alloc: Remote per-cpu lists drain support" -- avoid interference of a high priority task due to a workqueue item draining per-cpu page lists. While many workloads can tolerate a brief interruption, it may cause a real-time task running on a NOHZ_FULL CPU to miss a deadline and at minimum, the draining is non-deterministic. Currently an IRQ-safe local_lock protects the page allocator per-cpu lists. The local_lock on its own prevents migration and the IRQ disabling protects from corruption due to an interrupt arriving while a page allocation is in progress. This series adjusts the locking. A spinlock is added to struct per_cpu_pages to protect the list contents while local_lock_irq is ultimately replaced by just the spinlock in the final patch. This allows a remote CPU to safely. Follow-on work should allow the spin_lock_irqsave to be converted to spin_lock to avoid IRQs being disabled/enabled in most cases. The follow-on patch will be one kernel release later as it is relatively high risk and it'll make bisections more clear if there are any problems. Patch 1 is a cosmetic patch to clarify when page->lru is storing buddy pages and when it is storing per-cpu pages. Patch 2 shrinks per_cpu_pages to make room for a spin lock. Strictly speaking this is not necessary but it avoids per_cpu_pages consuming another cache line. Patch 3 is a preparation patch to avoid code duplication. Patch 4 is a minor correction. Patch 5 uses a spin_lock to protect the per_cpu_pages contents while still relying on local_lock to prevent migration, stabilise the pcp lookup and prevent IRQ reentrancy. Patch 6 remote drains per-cpu pages directly instead of using a workqueue. Patch 7 uses a normal spinlock instead of local_lock for remote draining This patch (of 7): The page allocator uses page->lru for storing pages on either buddy or PCP lists. Create page->buddy_list and page->pcp_list as a union with page->lru. This is simply to clarify what type of list a page is on in the page allocator. No functional change intended. [minchan@kernel.org: fix page lru fields in macros] Link: https://lkml.kernel.org/r/20220624125423.6126-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Tested-by: Minchan Kim <minchan@kernel.org> Acked-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									bcd51a3c67
								
							
						
					
					
						commit
						bf75f20056
					
				
					 2 changed files with 17 additions and 12 deletions
				
			
		|  | @ -87,6 +87,7 @@ struct page { | |||
| 			 */ | ||||
| 			union { | ||||
| 				struct list_head lru; | ||||
| 
 | ||||
| 				/* Or, for the Unevictable "LRU list" slot */ | ||||
| 				struct { | ||||
| 					/* Always even, to negate PageTail */ | ||||
|  | @ -94,6 +95,10 @@ struct page { | |||
| 					/* Count page's or folio's mlocks */ | ||||
| 					unsigned int mlock_count; | ||||
| 				}; | ||||
| 
 | ||||
| 				/* Or, free page */ | ||||
| 				struct list_head buddy_list; | ||||
| 				struct list_head pcp_list; | ||||
| 			}; | ||||
| 			/* See page-flags.h for PAGE_MAPPING_FLAGS */ | ||||
| 			struct address_space *mapping; | ||||
|  |  | |||
|  | @ -793,7 +793,7 @@ static inline bool set_page_guard(struct zone *zone, struct page *page, | |||
| 		return false; | ||||
| 
 | ||||
| 	__SetPageGuard(page); | ||||
| 	INIT_LIST_HEAD(&page->lru); | ||||
| 	INIT_LIST_HEAD(&page->buddy_list); | ||||
| 	set_page_private(page, order); | ||||
| 	/* Guard pages are not available for any usage */ | ||||
| 	__mod_zone_freepage_state(zone, -(1 << order), migratetype); | ||||
|  | @ -936,7 +936,7 @@ static inline void add_to_free_list(struct page *page, struct zone *zone, | |||
| { | ||||
| 	struct free_area *area = &zone->free_area[order]; | ||||
| 
 | ||||
| 	list_add(&page->lru, &area->free_list[migratetype]); | ||||
| 	list_add(&page->buddy_list, &area->free_list[migratetype]); | ||||
| 	area->nr_free++; | ||||
| } | ||||
| 
 | ||||
|  | @ -946,7 +946,7 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone, | |||
| { | ||||
| 	struct free_area *area = &zone->free_area[order]; | ||||
| 
 | ||||
| 	list_add_tail(&page->lru, &area->free_list[migratetype]); | ||||
| 	list_add_tail(&page->buddy_list, &area->free_list[migratetype]); | ||||
| 	area->nr_free++; | ||||
| } | ||||
| 
 | ||||
|  | @ -960,7 +960,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone, | |||
| { | ||||
| 	struct free_area *area = &zone->free_area[order]; | ||||
| 
 | ||||
| 	list_move_tail(&page->lru, &area->free_list[migratetype]); | ||||
| 	list_move_tail(&page->buddy_list, &area->free_list[migratetype]); | ||||
| } | ||||
| 
 | ||||
| static inline void del_page_from_free_list(struct page *page, struct zone *zone, | ||||
|  | @ -970,7 +970,7 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, | |||
| 	if (page_reported(page)) | ||||
| 		__ClearPageReported(page); | ||||
| 
 | ||||
| 	list_del(&page->lru); | ||||
| 	list_del(&page->buddy_list); | ||||
| 	__ClearPageBuddy(page); | ||||
| 	set_page_private(page, 0); | ||||
| 	zone->free_area[order].nr_free--; | ||||
|  | @ -1508,11 +1508,11 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
| 		do { | ||||
| 			int mt; | ||||
| 
 | ||||
| 			page = list_last_entry(list, struct page, lru); | ||||
| 			page = list_last_entry(list, struct page, pcp_list); | ||||
| 			mt = get_pcppage_migratetype(page); | ||||
| 
 | ||||
| 			/* must delete to avoid corrupting pcp list */ | ||||
| 			list_del(&page->lru); | ||||
| 			list_del(&page->pcp_list); | ||||
| 			count -= nr_pages; | ||||
| 			pcp->count -= nr_pages; | ||||
| 
 | ||||
|  | @ -3072,7 +3072,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
| 		 * for IO devices that can merge IO requests if the physical | ||||
| 		 * pages are ordered properly. | ||||
| 		 */ | ||||
| 		list_add_tail(&page->lru, list); | ||||
| 		list_add_tail(&page->pcp_list, list); | ||||
| 		allocated++; | ||||
| 		if (is_migrate_cma(get_pcppage_migratetype(page))) | ||||
| 			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | ||||
|  | @ -3322,7 +3322,7 @@ void mark_free_pages(struct zone *zone) | |||
| 
 | ||||
| 	for_each_migratetype_order(order, t) { | ||||
| 		list_for_each_entry(page, | ||||
| 				&zone->free_area[order].free_list[t], lru) { | ||||
| 				&zone->free_area[order].free_list[t], buddy_list) { | ||||
| 			unsigned long i; | ||||
| 
 | ||||
| 			pfn = page_to_pfn(page); | ||||
|  | @ -3411,7 +3411,7 @@ static void free_unref_page_commit(struct page *page, int migratetype, | |||
| 	__count_vm_event(PGFREE); | ||||
| 	pcp = this_cpu_ptr(zone->per_cpu_pageset); | ||||
| 	pindex = order_to_pindex(migratetype, order); | ||||
| 	list_add(&page->lru, &pcp->lists[pindex]); | ||||
| 	list_add(&page->pcp_list, &pcp->lists[pindex]); | ||||
| 	pcp->count += 1 << order; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -3674,8 +3674,8 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, | |||
| 				return NULL; | ||||
| 		} | ||||
| 
 | ||||
| 		page = list_first_entry(list, struct page, lru); | ||||
| 		list_del(&page->lru); | ||||
| 		page = list_first_entry(list, struct page, pcp_list); | ||||
| 		list_del(&page->pcp_list); | ||||
| 		pcp->count -= 1 << order; | ||||
| 	} while (check_new_pcp(page, order)); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Mel Gorman
						Mel Gorman