mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm/vmalloc: huge vmalloc backing pages should be split rather than compound
Huge vmalloc higher-order backing pages were allocated with __GFP_COMP in order to allow the sub-pages to be refcounted by callers such as "remap_vmalloc_page [sic]" (remap_vmalloc_range). However a similar problem exists for other struct page fields callers use, for example fb_deferred_io_fault() takes a vmalloc'ed page and not only refcounts it but uses ->lru, ->mapping, ->index. This is not compatible with compound sub-pages, and can cause bad page state issues like BUG: Bad page state in process swapper/0 pfn:00743 page:(____ptrval____) refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x743 flags: 0x7ffff000000000(node=0|zone=0|lastcpupid=0x7ffff) raw: 007ffff000000000 c00c00000001d0c8 c00c00000001d0c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: corrupted mapping in tail page Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.18.0-rc3-00082-gfc6fff4a7ce1-dirty #2810 Call Trace: dump_stack_lvl+0x74/0xa8 (unreliable) bad_page+0x12c/0x170 free_tail_pages_check+0xe8/0x190 free_pcp_prepare+0x31c/0x4e0 free_unref_page+0x40/0x1b0 __vunmap+0x1d8/0x420 ... The correct approach is to use split high-order pages for the huge vmalloc backing. These allow callers to treat them in exactly the same way as individually-allocated order-0 pages. Link: https://lore.kernel.org/all/14444103-d51b-0fb3-ee63-c3f182f0b546@molgen.mpg.de/ Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Cc: Paul Menzel <pmenzel@molgen.mpg.de> Cc: Song Liu <songliubraving@fb.com> Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									d569e86915
								
							
						
					
					
						commit
						3b8000ae18
					
				
					 1 changed files with 21 additions and 15 deletions
				
			
		
							
								
								
									
										36
									
								
								mm/vmalloc.c
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								mm/vmalloc.c
									
									
									
									
									
								
							| 
						 | 
					@ -2653,15 +2653,18 @@ static void __vunmap(const void *addr, int deallocate_pages)
 | 
				
			||||||
	vm_remove_mappings(area, deallocate_pages);
 | 
						vm_remove_mappings(area, deallocate_pages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (deallocate_pages) {
 | 
						if (deallocate_pages) {
 | 
				
			||||||
		unsigned int page_order = vm_area_page_order(area);
 | 
							int i;
 | 
				
			||||||
		int i, step = 1U << page_order;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		for (i = 0; i < area->nr_pages; i += step) {
 | 
							for (i = 0; i < area->nr_pages; i++) {
 | 
				
			||||||
			struct page *page = area->pages[i];
 | 
								struct page *page = area->pages[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			BUG_ON(!page);
 | 
								BUG_ON(!page);
 | 
				
			||||||
			mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
 | 
								mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
 | 
				
			||||||
			__free_pages(page, page_order);
 | 
								/*
 | 
				
			||||||
 | 
								 * High-order allocs for huge vmallocs are split, so
 | 
				
			||||||
 | 
								 * can be freed as an array of order-0 allocations
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								__free_pages(page, 0);
 | 
				
			||||||
			cond_resched();
 | 
								cond_resched();
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
 | 
							atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
 | 
				
			||||||
| 
						 | 
					@ -2914,12 +2917,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 | 
				
			||||||
			if (nr != nr_pages_request)
 | 
								if (nr != nr_pages_request)
 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} else
 | 
						}
 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Compound pages required for remap_vmalloc_page if
 | 
					 | 
				
			||||||
		 * high-order pages.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		gfp |= __GFP_COMP;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* High-order pages or fallback path if "bulk" fails. */
 | 
						/* High-order pages or fallback path if "bulk" fails. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2933,6 +2931,15 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 | 
				
			||||||
			page = alloc_pages_node(nid, gfp, order);
 | 
								page = alloc_pages_node(nid, gfp, order);
 | 
				
			||||||
		if (unlikely(!page))
 | 
							if (unlikely(!page))
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Higher order allocations must be able to be treated as
 | 
				
			||||||
 | 
							 * indepdenent small pages by callers (as they can with
 | 
				
			||||||
 | 
							 * small-page vmallocs). Some drivers do their own refcounting
 | 
				
			||||||
 | 
							 * on vmalloc_to_page() pages, some use page->mapping,
 | 
				
			||||||
 | 
							 * page->lru, etc.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (order)
 | 
				
			||||||
 | 
								split_page(page, order);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Careful, we allocate and map page-order pages, but
 | 
							 * Careful, we allocate and map page-order pages, but
 | 
				
			||||||
| 
						 | 
					@ -2992,11 +2999,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 | 
						atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 | 
				
			||||||
	if (gfp_mask & __GFP_ACCOUNT) {
 | 
						if (gfp_mask & __GFP_ACCOUNT) {
 | 
				
			||||||
		int i, step = 1U << page_order;
 | 
							int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		for (i = 0; i < area->nr_pages; i += step)
 | 
							for (i = 0; i < area->nr_pages; i++)
 | 
				
			||||||
			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
 | 
								mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
 | 
				
			||||||
					     step);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue