forked from mirrors/linux
		
	mm/vmalloc: hugepage vmalloc mappings
Support huge page vmalloc mappings. Config option HAVE_ARCH_HUGE_VMALLOC enables support on architectures that define HAVE_ARCH_HUGE_VMAP and supports PMD sized vmap mappings. vmalloc will attempt to allocate PMD-sized pages if allocating PMD size or larger, and fall back to small pages if that was unsuccessful. Architectures must ensure that any arch specific vmalloc allocations that require PAGE_SIZE mappings (e.g., module allocations vs strict module rwx) use the VM_NOHUGE flag to inhibit larger mappings. This can result in more internal fragmentation and memory overhead for a given allocation, an option nohugevmalloc is added to disable at boot. [colin.king@canonical.com: fix read of uninitialized pointer area] Link: https://lkml.kernel.org/r/20210318155955.18220-1-colin.king@canonical.com Link: https://lkml.kernel.org/r/20210317062402.533919-14-npiggin@gmail.com Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Ding Tianhong <dingtianhong@huawei.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Uladzislau Rezki (Sony) <urezki@gmail.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									5d87510de1
								
							
						
					
					
						commit
						121e6f3258
					
				
					 4 changed files with 210 additions and 49 deletions
				
			
		
							
								
								
									
										11
									
								
								arch/Kconfig
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								arch/Kconfig
									
									
									
									
									
								
							|  | @ -829,6 +829,17 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||||||
| config HAVE_ARCH_HUGE_VMAP | config HAVE_ARCH_HUGE_VMAP | ||||||
| 	bool | 	bool | ||||||
| 
 | 
 | ||||||
|  | # | ||||||
|  | #  Archs that select this would be capable of PMD-sized vmaps (i.e., | ||||||
|  | #  arch_vmap_pmd_supported() returns true), and they must make no assumptions | ||||||
|  | #  that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag | ||||||
|  | #  can be used to prohibit arch-specific allocations from using hugepages to | ||||||
|  | #  help with this (e.g., modules may require it). | ||||||
|  | # | ||||||
|  | config HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | 	depends on HAVE_ARCH_HUGE_VMAP | ||||||
|  | 	bool | ||||||
|  | 
 | ||||||
| config ARCH_WANT_HUGE_PMD_SHARE | config ARCH_WANT_HUGE_PMD_SHARE | ||||||
| 	bool | 	bool | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -26,6 +26,7 @@ struct notifier_block;		/* in notifier.h */ | ||||||
| #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */ | #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */ | ||||||
| #define VM_FLUSH_RESET_PERMS	0x00000100	/* reset direct map and flush TLB on unmap, can't be freed in atomic context */ | #define VM_FLUSH_RESET_PERMS	0x00000100	/* reset direct map and flush TLB on unmap, can't be freed in atomic context */ | ||||||
| #define VM_MAP_PUT_PAGES	0x00000200	/* put pages and free array in vfree */ | #define VM_MAP_PUT_PAGES	0x00000200	/* put pages and free array in vfree */ | ||||||
|  | #define VM_NO_HUGE_VMAP		0x00000400	/* force PAGE_SIZE pte mapping */ | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. |  * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. | ||||||
|  | @ -54,6 +55,9 @@ struct vm_struct { | ||||||
| 	unsigned long		size; | 	unsigned long		size; | ||||||
| 	unsigned long		flags; | 	unsigned long		flags; | ||||||
| 	struct page		**pages; | 	struct page		**pages; | ||||||
|  | #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | 	unsigned int		page_order; | ||||||
|  | #endif | ||||||
| 	unsigned int		nr_pages; | 	unsigned int		nr_pages; | ||||||
| 	phys_addr_t		phys_addr; | 	phys_addr_t		phys_addr; | ||||||
| 	const void		*caller; | 	const void		*caller; | ||||||
|  | @ -188,6 +192,22 @@ void free_vm_area(struct vm_struct *area); | ||||||
| extern struct vm_struct *remove_vm_area(const void *addr); | extern struct vm_struct *remove_vm_area(const void *addr); | ||||||
| extern struct vm_struct *find_vm_area(const void *addr); | extern struct vm_struct *find_vm_area(const void *addr); | ||||||
| 
 | 
 | ||||||
|  | static inline bool is_vm_area_hugepages(const void *addr) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * This may not 100% tell if the area is mapped with > PAGE_SIZE | ||||||
|  | 	 * page table entries, if for some reason the architecture indicates | ||||||
|  | 	 * larger sizes are available but decides not to use them, nothing | ||||||
|  | 	 * prevents that. This only indicates the size of the physical page | ||||||
|  | 	 * allocated in the vmalloc layer. | ||||||
|  | 	 */ | ||||||
|  | #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | 	return find_vm_area(addr)->page_order > 0; | ||||||
|  | #else | ||||||
|  | 	return false; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_MMU | #ifdef CONFIG_MMU | ||||||
| int vmap_range(unsigned long addr, unsigned long end, | int vmap_range(unsigned long addr, unsigned long end, | ||||||
| 			phys_addr_t phys_addr, pgprot_t prot, | 			phys_addr_t phys_addr, pgprot_t prot, | ||||||
|  | @ -205,6 +225,7 @@ static inline void set_vm_flush_reset_perms(void *addr) | ||||||
| 	if (vm) | 	if (vm) | ||||||
| 		vm->flags |= VM_FLUSH_RESET_PERMS; | 		vm->flags |= VM_FLUSH_RESET_PERMS; | ||||||
| } | } | ||||||
|  | 
 | ||||||
| #else | #else | ||||||
| static inline int | static inline int | ||||||
| map_kernel_range_noflush(unsigned long start, unsigned long size, | map_kernel_range_noflush(unsigned long start, unsigned long size, | ||||||
|  |  | ||||||
|  | @ -72,6 +72,7 @@ | ||||||
| #include <linux/padata.h> | #include <linux/padata.h> | ||||||
| #include <linux/khugepaged.h> | #include <linux/khugepaged.h> | ||||||
| #include <linux/buffer_head.h> | #include <linux/buffer_head.h> | ||||||
|  | #include <linux/vmalloc.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/sections.h> | #include <asm/sections.h> | ||||||
| #include <asm/tlbflush.h> | #include <asm/tlbflush.h> | ||||||
|  | @ -8222,6 +8223,7 @@ void *__init alloc_large_system_hash(const char *tablename, | ||||||
| 	void *table = NULL; | 	void *table = NULL; | ||||||
| 	gfp_t gfp_flags; | 	gfp_t gfp_flags; | ||||||
| 	bool virt; | 	bool virt; | ||||||
|  | 	bool huge; | ||||||
| 
 | 
 | ||||||
| 	/* allow the kernel cmdline to have a say */ | 	/* allow the kernel cmdline to have a say */ | ||||||
| 	if (!numentries) { | 	if (!numentries) { | ||||||
|  | @ -8289,6 +8291,7 @@ void *__init alloc_large_system_hash(const char *tablename, | ||||||
| 		} else if (get_order(size) >= MAX_ORDER || hashdist) { | 		} else if (get_order(size) >= MAX_ORDER || hashdist) { | ||||||
| 			table = __vmalloc(size, gfp_flags); | 			table = __vmalloc(size, gfp_flags); | ||||||
| 			virt = true; | 			virt = true; | ||||||
|  | 			huge = is_vm_area_hugepages(table); | ||||||
| 		} else { | 		} else { | ||||||
| 			/*
 | 			/*
 | ||||||
| 			 * If bucketsize is not a power-of-two, we may free | 			 * If bucketsize is not a power-of-two, we may free | ||||||
|  | @ -8305,7 +8308,7 @@ void *__init alloc_large_system_hash(const char *tablename, | ||||||
| 
 | 
 | ||||||
| 	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", | 	pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", | ||||||
| 		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, | 		tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, | ||||||
| 		virt ? "vmalloc" : "linear"); | 		virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); | ||||||
| 
 | 
 | ||||||
| 	if (_hash_shift) | 	if (_hash_shift) | ||||||
| 		*_hash_shift = log2qty; | 		*_hash_shift = log2qty; | ||||||
|  |  | ||||||
							
								
								
									
										222
									
								
								mm/vmalloc.c
									
									
									
									
									
								
							
							
						
						
									
										222
									
								
								mm/vmalloc.c
									
									
									
									
									
								
							|  | @ -42,6 +42,19 @@ | ||||||
| #include "internal.h" | #include "internal.h" | ||||||
| #include "pgalloc-track.h" | #include "pgalloc-track.h" | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | static bool __ro_after_init vmap_allow_huge = true; | ||||||
|  | 
 | ||||||
|  | static int __init set_nohugevmalloc(char *str) | ||||||
|  | { | ||||||
|  | 	vmap_allow_huge = false; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | early_param("nohugevmalloc", set_nohugevmalloc); | ||||||
|  | #else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */ | ||||||
|  | static const bool vmap_allow_huge = false; | ||||||
|  | #endif	/* CONFIG_HAVE_ARCH_HUGE_VMALLOC */ | ||||||
|  | 
 | ||||||
| bool is_vmalloc_addr(const void *x) | bool is_vmalloc_addr(const void *x) | ||||||
| { | { | ||||||
| 	unsigned long addr = (unsigned long)x; | 	unsigned long addr = (unsigned long)x; | ||||||
|  | @ -483,6 +496,69 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, | ||||||
|  | 		pgprot_t prot, struct page **pages) | ||||||
|  | { | ||||||
|  | 	unsigned long start = addr; | ||||||
|  | 	pgd_t *pgd; | ||||||
|  | 	unsigned long next; | ||||||
|  | 	int err = 0; | ||||||
|  | 	int nr = 0; | ||||||
|  | 	pgtbl_mod_mask mask = 0; | ||||||
|  | 
 | ||||||
|  | 	BUG_ON(addr >= end); | ||||||
|  | 	pgd = pgd_offset_k(addr); | ||||||
|  | 	do { | ||||||
|  | 		next = pgd_addr_end(addr, end); | ||||||
|  | 		if (pgd_bad(*pgd)) | ||||||
|  | 			mask |= PGTBL_PGD_MODIFIED; | ||||||
|  | 		err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); | ||||||
|  | 		if (err) | ||||||
|  | 			return err; | ||||||
|  | 	} while (pgd++, addr = next, addr != end); | ||||||
|  | 
 | ||||||
|  | 	if (mask & ARCH_PAGE_TABLE_SYNC_MASK) | ||||||
|  | 		arch_sync_kernel_mappings(start, end); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int vmap_pages_range_noflush(unsigned long addr, unsigned long end, | ||||||
|  | 		pgprot_t prot, struct page **pages, unsigned int page_shift) | ||||||
|  | { | ||||||
|  | 	unsigned int i, nr = (end - addr) >> PAGE_SHIFT; | ||||||
|  | 
 | ||||||
|  | 	WARN_ON(page_shift < PAGE_SHIFT); | ||||||
|  | 
 | ||||||
|  | 	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || | ||||||
|  | 			page_shift == PAGE_SHIFT) | ||||||
|  | 		return vmap_small_pages_range_noflush(addr, end, prot, pages); | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { | ||||||
|  | 		int err; | ||||||
|  | 
 | ||||||
|  | 		err = vmap_range_noflush(addr, addr + (1UL << page_shift), | ||||||
|  | 					__pa(page_address(pages[i])), prot, | ||||||
|  | 					page_shift); | ||||||
|  | 		if (err) | ||||||
|  | 			return err; | ||||||
|  | 
 | ||||||
|  | 		addr += 1UL << page_shift; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int vmap_pages_range(unsigned long addr, unsigned long end, | ||||||
|  | 		pgprot_t prot, struct page **pages, unsigned int page_shift) | ||||||
|  | { | ||||||
|  | 	int err; | ||||||
|  | 
 | ||||||
|  | 	err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift); | ||||||
|  | 	flush_cache_vmap(addr, end); | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * map_kernel_range_noflush - map kernel VM area with the specified pages |  * map_kernel_range_noflush - map kernel VM area with the specified pages | ||||||
|  * @addr: start of the VM area to map |  * @addr: start of the VM area to map | ||||||
|  | @ -504,29 +580,7 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr, | ||||||
| int map_kernel_range_noflush(unsigned long addr, unsigned long size, | int map_kernel_range_noflush(unsigned long addr, unsigned long size, | ||||||
| 			     pgprot_t prot, struct page **pages) | 			     pgprot_t prot, struct page **pages) | ||||||
| { | { | ||||||
| 	unsigned long start = addr; | 	return vmap_pages_range_noflush(addr, addr + size, prot, pages, PAGE_SHIFT); | ||||||
| 	unsigned long end = addr + size; |  | ||||||
| 	unsigned long next; |  | ||||||
| 	pgd_t *pgd; |  | ||||||
| 	int err = 0; |  | ||||||
| 	int nr = 0; |  | ||||||
| 	pgtbl_mod_mask mask = 0; |  | ||||||
| 
 |  | ||||||
| 	BUG_ON(addr >= end); |  | ||||||
| 	pgd = pgd_offset_k(addr); |  | ||||||
| 	do { |  | ||||||
| 		next = pgd_addr_end(addr, end); |  | ||||||
| 		if (pgd_bad(*pgd)) |  | ||||||
| 			mask |= PGTBL_PGD_MODIFIED; |  | ||||||
| 		err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); |  | ||||||
| 		if (err) |  | ||||||
| 			return err; |  | ||||||
| 	} while (pgd++, addr = next, addr != end); |  | ||||||
| 
 |  | ||||||
| 	if (mask & ARCH_PAGE_TABLE_SYNC_MASK) |  | ||||||
| 		arch_sync_kernel_mappings(start, end); |  | ||||||
| 
 |  | ||||||
| 	return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, | int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, | ||||||
|  | @ -2112,6 +2166,24 @@ EXPORT_SYMBOL(vm_map_ram); | ||||||
| 
 | 
 | ||||||
| static struct vm_struct *vmlist __initdata; | static struct vm_struct *vmlist __initdata; | ||||||
| 
 | 
 | ||||||
|  | static inline unsigned int vm_area_page_order(struct vm_struct *vm) | ||||||
|  | { | ||||||
|  | #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | 	return vm->page_order; | ||||||
|  | #else | ||||||
|  | 	return 0; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order) | ||||||
|  | { | ||||||
|  | #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC | ||||||
|  | 	vm->page_order = order; | ||||||
|  | #else | ||||||
|  | 	BUG_ON(order != 0); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * vm_area_add_early - add vmap area early during boot |  * vm_area_add_early - add vmap area early during boot | ||||||
|  * @vm: vm_struct to add |  * @vm: vm_struct to add | ||||||
|  | @ -2422,6 +2494,7 @@ static inline void set_area_direct_map(const struct vm_struct *area, | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
|  | 	/* HUGE_VMALLOC passes small pages to set_direct_map */ | ||||||
| 	for (i = 0; i < area->nr_pages; i++) | 	for (i = 0; i < area->nr_pages; i++) | ||||||
| 		if (page_address(area->pages[i])) | 		if (page_address(area->pages[i])) | ||||||
| 			set_direct_map(area->pages[i]); | 			set_direct_map(area->pages[i]); | ||||||
|  | @ -2431,6 +2504,7 @@ static inline void set_area_direct_map(const struct vm_struct *area, | ||||||
| static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) | static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) | ||||||
| { | { | ||||||
| 	unsigned long start = ULONG_MAX, end = 0; | 	unsigned long start = ULONG_MAX, end = 0; | ||||||
|  | 	unsigned int page_order = vm_area_page_order(area); | ||||||
| 	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; | 	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; | ||||||
| 	int flush_dmap = 0; | 	int flush_dmap = 0; | ||||||
| 	int i; | 	int i; | ||||||
|  | @ -2455,11 +2529,14 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) | ||||||
| 	 * map. Find the start and end range of the direct mappings to make sure | 	 * map. Find the start and end range of the direct mappings to make sure | ||||||
| 	 * the vm_unmap_aliases() flush includes the direct map. | 	 * the vm_unmap_aliases() flush includes the direct map. | ||||||
| 	 */ | 	 */ | ||||||
| 	for (i = 0; i < area->nr_pages; i++) { | 	for (i = 0; i < area->nr_pages; i += 1U << page_order) { | ||||||
| 		unsigned long addr = (unsigned long)page_address(area->pages[i]); | 		unsigned long addr = (unsigned long)page_address(area->pages[i]); | ||||||
| 		if (addr) { | 		if (addr) { | ||||||
|  | 			unsigned long page_size; | ||||||
|  | 
 | ||||||
|  | 			page_size = PAGE_SIZE << page_order; | ||||||
| 			start = min(addr, start); | 			start = min(addr, start); | ||||||
| 			end = max(addr + PAGE_SIZE, end); | 			end = max(addr + page_size, end); | ||||||
| 			flush_dmap = 1; | 			flush_dmap = 1; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | @ -2500,13 +2577,14 @@ static void __vunmap(const void *addr, int deallocate_pages) | ||||||
| 	vm_remove_mappings(area, deallocate_pages); | 	vm_remove_mappings(area, deallocate_pages); | ||||||
| 
 | 
 | ||||||
| 	if (deallocate_pages) { | 	if (deallocate_pages) { | ||||||
|  | 		unsigned int page_order = vm_area_page_order(area); | ||||||
| 		int i; | 		int i; | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < area->nr_pages; i++) { | 		for (i = 0; i < area->nr_pages; i += 1U << page_order) { | ||||||
| 			struct page *page = area->pages[i]; | 			struct page *page = area->pages[i]; | ||||||
| 
 | 
 | ||||||
| 			BUG_ON(!page); | 			BUG_ON(!page); | ||||||
| 			__free_pages(page, 0); | 			__free_pages(page, page_order); | ||||||
| 		} | 		} | ||||||
| 		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); | 		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); | ||||||
| 
 | 
 | ||||||
|  | @ -2697,15 +2775,19 @@ EXPORT_SYMBOL_GPL(vmap_pfn); | ||||||
| #endif /* CONFIG_VMAP_PFN */ | #endif /* CONFIG_VMAP_PFN */ | ||||||
| 
 | 
 | ||||||
| static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | ||||||
| 				 pgprot_t prot, int node) | 				 pgprot_t prot, unsigned int page_shift, | ||||||
|  | 				 int node) | ||||||
| { | { | ||||||
| 	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | 	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | ||||||
| 	unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; | 	unsigned long addr = (unsigned long)area->addr; | ||||||
|  | 	unsigned long size = get_vm_area_size(area); | ||||||
| 	unsigned long array_size; | 	unsigned long array_size; | ||||||
| 	unsigned int i; | 	unsigned int nr_small_pages = size >> PAGE_SHIFT; | ||||||
|  | 	unsigned int page_order; | ||||||
| 	struct page **pages; | 	struct page **pages; | ||||||
|  | 	unsigned int i; | ||||||
| 
 | 
 | ||||||
| 	array_size = (unsigned long)nr_pages * sizeof(struct page *); | 	array_size = (unsigned long)nr_small_pages * sizeof(struct page *); | ||||||
| 	gfp_mask |= __GFP_NOWARN; | 	gfp_mask |= __GFP_NOWARN; | ||||||
| 	if (!(gfp_mask & (GFP_DMA | GFP_DMA32))) | 	if (!(gfp_mask & (GFP_DMA | GFP_DMA32))) | ||||||
| 		gfp_mask |= __GFP_HIGHMEM; | 		gfp_mask |= __GFP_HIGHMEM; | ||||||
|  | @ -2724,30 +2806,38 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	area->pages = pages; | 	area->pages = pages; | ||||||
| 	area->nr_pages = nr_pages; | 	area->nr_pages = nr_small_pages; | ||||||
|  | 	set_vm_area_page_order(area, page_shift - PAGE_SHIFT); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < area->nr_pages; i++) { | 	page_order = vm_area_page_order(area); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Careful, we allocate and map page_order pages, but tracking is done | ||||||
|  | 	 * per PAGE_SIZE page so as to keep the vm_struct APIs independent of | ||||||
|  | 	 * the physical/mapped size. | ||||||
|  | 	 */ | ||||||
|  | 	for (i = 0; i < area->nr_pages; i += 1U << page_order) { | ||||||
| 		struct page *page; | 		struct page *page; | ||||||
|  | 		int p; | ||||||
| 
 | 
 | ||||||
| 		if (node == NUMA_NO_NODE) | 		/* Compound pages required for remap_vmalloc_page */ | ||||||
| 			page = alloc_page(gfp_mask); | 		page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order); | ||||||
| 		else |  | ||||||
| 			page = alloc_pages_node(node, gfp_mask, 0); |  | ||||||
| 
 |  | ||||||
| 		if (unlikely(!page)) { | 		if (unlikely(!page)) { | ||||||
| 			/* Successfully allocated i pages, free them in __vfree() */ | 			/* Successfully allocated i pages, free them in __vfree() */ | ||||||
| 			area->nr_pages = i; | 			area->nr_pages = i; | ||||||
| 			atomic_long_add(area->nr_pages, &nr_vmalloc_pages); | 			atomic_long_add(area->nr_pages, &nr_vmalloc_pages); | ||||||
| 			goto fail; | 			goto fail; | ||||||
| 		} | 		} | ||||||
| 		area->pages[i] = page; | 
 | ||||||
|  | 		for (p = 0; p < (1U << page_order); p++) | ||||||
|  | 			area->pages[i + p] = page + p; | ||||||
|  | 
 | ||||||
| 		if (gfpflags_allow_blocking(gfp_mask)) | 		if (gfpflags_allow_blocking(gfp_mask)) | ||||||
| 			cond_resched(); | 			cond_resched(); | ||||||
| 	} | 	} | ||||||
| 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages); | 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages); | ||||||
| 
 | 
 | ||||||
| 	if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area), | 	if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0) | ||||||
| 			prot, pages) < 0) |  | ||||||
| 		goto fail; | 		goto fail; | ||||||
| 
 | 
 | ||||||
| 	return area->addr; | 	return area->addr; | ||||||
|  | @ -2755,7 +2845,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | ||||||
| fail: | fail: | ||||||
| 	warn_alloc(gfp_mask, NULL, | 	warn_alloc(gfp_mask, NULL, | ||||||
| 			  "vmalloc: allocation failure, allocated %ld of %ld bytes", | 			  "vmalloc: allocation failure, allocated %ld of %ld bytes", | ||||||
| 			  (area->nr_pages*PAGE_SIZE), area->size); | 			  (area->nr_pages*PAGE_SIZE), size); | ||||||
| 	__vfree(area->addr); | 	__vfree(area->addr); | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
|  | @ -2786,19 +2876,45 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, | ||||||
| 	struct vm_struct *area; | 	struct vm_struct *area; | ||||||
| 	void *addr; | 	void *addr; | ||||||
| 	unsigned long real_size = size; | 	unsigned long real_size = size; | ||||||
|  | 	unsigned long real_align = align; | ||||||
|  | 	unsigned int shift = PAGE_SHIFT; | ||||||
| 
 | 
 | ||||||
| 	size = PAGE_ALIGN(size); | 	if (!size || (size >> PAGE_SHIFT) > totalram_pages()) { | ||||||
| 	if (!size || (size >> PAGE_SHIFT) > totalram_pages()) | 		area = NULL; | ||||||
| 		goto fail; | 		goto fail; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | | 	if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP) && | ||||||
|  | 			arch_vmap_pmd_supported(prot)) { | ||||||
|  | 		unsigned long size_per_node; | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * Try huge pages. Only try for PAGE_KERNEL allocations, | ||||||
|  | 		 * others like modules don't yet expect huge pages in | ||||||
|  | 		 * their allocations due to apply_to_page_range not | ||||||
|  | 		 * supporting them. | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		size_per_node = size; | ||||||
|  | 		if (node == NUMA_NO_NODE) | ||||||
|  | 			size_per_node /= num_online_nodes(); | ||||||
|  | 		if (size_per_node >= PMD_SIZE) { | ||||||
|  | 			shift = PMD_SHIFT; | ||||||
|  | 			align = max(real_align, 1UL << shift); | ||||||
|  | 			size = ALIGN(real_size, 1UL << shift); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | again: | ||||||
|  | 	size = PAGE_ALIGN(size); | ||||||
|  | 	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | | ||||||
| 				vm_flags, start, end, node, gfp_mask, caller); | 				vm_flags, start, end, node, gfp_mask, caller); | ||||||
| 	if (!area) | 	if (!area) | ||||||
| 		goto fail; | 		goto fail; | ||||||
| 
 | 
 | ||||||
| 	addr = __vmalloc_area_node(area, gfp_mask, prot, node); | 	addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); | ||||||
| 	if (!addr) | 	if (!addr) | ||||||
| 		return NULL; | 		goto fail; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED | 	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED | ||||||
|  | @ -2812,8 +2928,18 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, | ||||||
| 	return addr; | 	return addr; | ||||||
| 
 | 
 | ||||||
| fail: | fail: | ||||||
| 	warn_alloc(gfp_mask, NULL, | 	if (shift > PAGE_SHIFT) { | ||||||
|  | 		shift = PAGE_SHIFT; | ||||||
|  | 		align = real_align; | ||||||
|  | 		size = real_size; | ||||||
|  | 		goto again; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (!area) { | ||||||
|  | 		/* Warn for area allocation, page allocations already warn */ | ||||||
|  | 		warn_alloc(gfp_mask, NULL, | ||||||
| 			  "vmalloc: allocation failure: %lu bytes", real_size); | 			  "vmalloc: allocation failure: %lu bytes", real_size); | ||||||
|  | 	} | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Nicholas Piggin
						Nicholas Piggin