forked from mirrors/linux
		
	page_cgroup: reduce allocation overhead for page_cgroup array for CONFIG_SPARSEMEM
Currently we are allocating a single page_cgroup array per memory section
(stored in mem_section->base) when CONFIG_SPARSEMEM is selected.  This is
correct but memory inefficient solution because the allocated memory
(unless we fall back to vmalloc) is not kmalloc friendly:
        - 32b - 16384 entries (20B per entry) fit into 327680B so the
          524288B slab cache is used
        - 32b with PAE - 131072 entries with 2621440B fit into 4194304B
        - 64b - 32768 entries (40B per entry) fit into 2097152 cache
This is ~37% wasted space per memory section and it sumps up for the whole
memory.  On a x86_64 machine it is something like 6MB per 1GB of RAM.
We can reduce the internal fragmentation by using alloc_pages_exact which
allocates PAGE_SIZE aligned blocks so we will get down to <4kB wasted
memory per section which is much better.
We still need a fallback to vmalloc because we have no guarantees that we
will have a continuous memory of that size (order-10) later on during the
hotplug events.
[hannes@cmpxchg.org: do not define unused free_page_cgroup() without memory hotplug]
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									4be4489fea
								
							
						
					
					
						commit
						dde79e005a
					
				
					 1 changed files with 36 additions and 22 deletions
				
			
		|  | @ -130,7 +130,38 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc) | ||||||
| 	return page; | 	return page; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* __alloc_bootmem...() is protected by !slab_available() */ | static void *__init_refok alloc_page_cgroup(size_t size, int nid) | ||||||
|  | { | ||||||
|  | 	void *addr = NULL; | ||||||
|  | 
 | ||||||
|  | 	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN); | ||||||
|  | 	if (addr) | ||||||
|  | 		return addr; | ||||||
|  | 
 | ||||||
|  | 	if (node_state(nid, N_HIGH_MEMORY)) | ||||||
|  | 		addr = vmalloc_node(size, nid); | ||||||
|  | 	else | ||||||
|  | 		addr = vmalloc(size); | ||||||
|  | 
 | ||||||
|  | 	return addr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_MEMORY_HOTPLUG | ||||||
|  | static void free_page_cgroup(void *addr) | ||||||
|  | { | ||||||
|  | 	if (is_vmalloc_addr(addr)) { | ||||||
|  | 		vfree(addr); | ||||||
|  | 	} else { | ||||||
|  | 		struct page *page = virt_to_page(addr); | ||||||
|  | 		if (!PageReserved(page)) { /* Is bootmem ? */ | ||||||
|  | 			size_t table_size = | ||||||
|  | 				sizeof(struct page_cgroup) * PAGES_PER_SECTION; | ||||||
|  | 			free_pages_exact(addr, table_size); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| static int __init_refok init_section_page_cgroup(unsigned long pfn) | static int __init_refok init_section_page_cgroup(unsigned long pfn) | ||||||
| { | { | ||||||
| 	struct page_cgroup *base, *pc; | 	struct page_cgroup *base, *pc; | ||||||
|  | @ -147,17 +178,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) | ||||||
| 
 | 
 | ||||||
| 	nid = page_to_nid(pfn_to_page(pfn)); | 	nid = page_to_nid(pfn_to_page(pfn)); | ||||||
| 	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | ||||||
| 	VM_BUG_ON(!slab_is_available()); | 	base = alloc_page_cgroup(table_size, nid); | ||||||
| 	if (node_state(nid, N_HIGH_MEMORY)) { | 
 | ||||||
| 		base = kmalloc_node(table_size, |  | ||||||
| 				    GFP_KERNEL | __GFP_NOWARN, nid); |  | ||||||
| 		if (!base) |  | ||||||
| 			base = vmalloc_node(table_size, nid); |  | ||||||
| 	} else { |  | ||||||
| 		base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); |  | ||||||
| 		if (!base) |  | ||||||
| 			base = vmalloc(table_size); |  | ||||||
| 	} |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The value stored in section->page_cgroup is (base - pfn) | 	 * The value stored in section->page_cgroup is (base - pfn) | ||||||
| 	 * and it does not point to the memory block allocated above, | 	 * and it does not point to the memory block allocated above, | ||||||
|  | @ -189,16 +211,8 @@ void __free_page_cgroup(unsigned long pfn) | ||||||
| 	if (!ms || !ms->page_cgroup) | 	if (!ms || !ms->page_cgroup) | ||||||
| 		return; | 		return; | ||||||
| 	base = ms->page_cgroup + pfn; | 	base = ms->page_cgroup + pfn; | ||||||
| 	if (is_vmalloc_addr(base)) { | 	free_page_cgroup(base); | ||||||
| 		vfree(base); | 	ms->page_cgroup = NULL; | ||||||
| 		ms->page_cgroup = NULL; |  | ||||||
| 	} else { |  | ||||||
| 		struct page *page = virt_to_page(base); |  | ||||||
| 		if (!PageReserved(page)) { /* Is bootmem ? */ |  | ||||||
| 			kfree(base); |  | ||||||
| 			ms->page_cgroup = NULL; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int __meminit online_page_cgroup(unsigned long start_pfn, | int __meminit online_page_cgroup(unsigned long start_pfn, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Michal Hocko
						Michal Hocko