mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm/page_alloc: Introduce free_area_init_core_hotplug
Currently, whenever a new node is created/re-used from the memhotplug path, we call free_area_init_node()->free_area_init_core(). But there is some code that we do not really need to run when we are coming from such path. free_area_init_core() performs the following actions: 1) Initializes pgdat internals, such as spinlock, waitqueues and more. 2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on when creating hash tables. 3) Account number of managed_pages per zone, substracting dma_reserved and memmap pages. 4) Initializes some fields of the zone structure data 5) Calls init_currently_empty_zone to initialize all the freelists 6) Calls memmap_init to initialize all pages belonging to certain zone When called from memhotplug path, free_area_init_core() only performs actions #1 and #4. Action #2 is pointless as the zones do not have any pages since either the node was freed, or we are re-using it, eitherway all zones belonging to this node should have 0 pages. For the same reason, action #3 results always in manages_pages being 0. Action #5 and #6 are performed later on when onlining the pages: online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone() online_pages()->move_pfn_range_to_zone()->memmap_init_zone() This patch does two things: First, moves the node/zone initializtion to their own function, so it allows us to create a small version of free_area_init_core, where we only perform: 1) Initialization of pgdat internals, such as spinlock, waitqueues and more 4) Initialization of some fields of the zone structure data These two functions are: pgdat_init_internals() and zone_init_internals(). The second thing this patch does, is to introduce free_area_init_core_hotplug(), the memhotplug version of free_area_init_core(): Currently, we call free_area_init_node() from the memhotplug path. In there, we set some pgdat's fields, and call calculate_node_totalpages(). calculate_node_totalpages() calculates the # of pages the node has. Since the node is either new, or we are re-using it, the zones belonging to this node should not have any pages, so there is no point to calculate this now. Actually, we re-set these values to 0 later on with the calls to: reset_node_managed_pages() reset_node_present_pages() The # of pages per node and the # of pages per zone will be calculated when onlining the pages: online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range() online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range() Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace __paginginit with __init, so their code gets freed up. [osalvador@techadventures.net: fix section usage] Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net [osalvador@suse.de: v6] Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net Signed-off-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0188dc98ad
								
							
						
					
					
						commit
						03e85f9d5f
					
				
					 4 changed files with 61 additions and 36 deletions
				
			
		| 
						 | 
				
			
			@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 | 
			
		|||
static inline void remove_memory(int nid, u64 start, u64 size) {}
 | 
			
		||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
 | 
			
		||||
 | 
			
		||||
extern void __ref free_area_init_core_hotplug(int nid);
 | 
			
		||||
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 | 
			
		||||
		void *arg, int (*func)(struct memory_block *, void *));
 | 
			
		||||
extern int add_memory(int nid, u64 start, u64 size);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
 | 
			
		|||
 | 
			
		||||
extern void __init pagecache_init(void);
 | 
			
		||||
extern void free_area_init(unsigned long * zones_size);
 | 
			
		||||
extern void free_area_init_node(int nid, unsigned long * zones_size,
 | 
			
		||||
extern void __init free_area_init_node(int nid, unsigned long * zones_size,
 | 
			
		||||
		unsigned long zone_start_pfn, unsigned long *zholes_size);
 | 
			
		||||
extern void free_initmem(void);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
 | 
			
		|||
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 | 
			
		||||
{
 | 
			
		||||
	struct pglist_data *pgdat;
 | 
			
		||||
	unsigned long zones_size[MAX_NR_ZONES] = {0};
 | 
			
		||||
	unsigned long zholes_size[MAX_NR_ZONES] = {0};
 | 
			
		||||
	unsigned long start_pfn = PFN_DOWN(start);
 | 
			
		||||
 | 
			
		||||
	pgdat = NODE_DATA(nid);
 | 
			
		||||
| 
						 | 
				
			
			@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 | 
			
		|||
 | 
			
		||||
	/* we can use NODE_DATA(nid) from here */
 | 
			
		||||
 | 
			
		||||
	pgdat->node_id = nid;
 | 
			
		||||
	pgdat->node_start_pfn = start_pfn;
 | 
			
		||||
 | 
			
		||||
	/* init node's zones as empty zones, we don't have any present pages.*/
 | 
			
		||||
	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
 | 
			
		||||
	free_area_init_core_hotplug(nid);
 | 
			
		||||
	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 | 
			
		|||
	 */
 | 
			
		||||
	build_all_zonelists(pgdat);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * zone->managed_pages is set to an approximate value in
 | 
			
		||||
	 * free_area_init_core(), which will cause
 | 
			
		||||
	 * /sys/device/system/node/nodeX/meminfo has wrong data.
 | 
			
		||||
	 * So reset it to 0 before any memory is onlined.
 | 
			
		||||
	 */
 | 
			
		||||
	reset_node_managed_pages(pgdat);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * When memory is hot-added, all the memory is in offline state. So
 | 
			
		||||
	 * clear all zones' present_pages because they will be updated in
 | 
			
		||||
	 * online_pages() and offline_pages().
 | 
			
		||||
	 */
 | 
			
		||||
	reset_node_managed_pages(pgdat);
 | 
			
		||||
	reset_node_present_pages(pgdat);
 | 
			
		||||
 | 
			
		||||
	return pgdat;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
 | 
			
		|||
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 | 
			
		||||
 | 
			
		||||
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
 | 
			
		||||
void __meminit set_pageblock_order(void)
 | 
			
		||||
void __init set_pageblock_order(void)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int order;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
 | 
			
		|||
 * include/linux/pageblock-flags.h for the values of pageblock_order based on
 | 
			
		||||
 * the kernel config
 | 
			
		||||
 */
 | 
			
		||||
void __meminit set_pageblock_order(void)
 | 
			
		||||
void __init set_pageblock_order(void)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 | 
			
		||||
 | 
			
		||||
static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages,
 | 
			
		||||
static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 | 
			
		||||
						unsigned long present_pages)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long pages = spanned_pages;
 | 
			
		||||
| 
						 | 
				
			
			@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
 | 
			
		|||
static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Set up the zone data structures:
 | 
			
		||||
 *   - mark all pages reserved
 | 
			
		||||
 *   - mark all memory queues empty
 | 
			
		||||
 *   - clear the memory bitmaps
 | 
			
		||||
 *
 | 
			
		||||
 * NOTE: pgdat should get zeroed by caller.
 | 
			
		||||
 */
 | 
			
		||||
static void __meminit free_area_init_core(struct pglist_data *pgdat)
 | 
			
		||||
static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 | 
			
		||||
{
 | 
			
		||||
	enum zone_type j;
 | 
			
		||||
	int nid = pgdat->node_id;
 | 
			
		||||
 | 
			
		||||
	pgdat_resize_init(pgdat);
 | 
			
		||||
 | 
			
		||||
	pgdat_init_numabalancing(pgdat);
 | 
			
		||||
| 
						 | 
				
			
			@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 | 
			
		|||
	pgdat_page_ext_init(pgdat);
 | 
			
		||||
	spin_lock_init(&pgdat->lru_lock);
 | 
			
		||||
	lruvec_init(node_lruvec(pgdat));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
 | 
			
		||||
							unsigned long remaining_pages)
 | 
			
		||||
{
 | 
			
		||||
	zone->managed_pages = remaining_pages;
 | 
			
		||||
	zone_set_nid(zone, nid);
 | 
			
		||||
	zone->name = zone_names[idx];
 | 
			
		||||
	zone->zone_pgdat = NODE_DATA(nid);
 | 
			
		||||
	spin_lock_init(&zone->lock);
 | 
			
		||||
	zone_seqlock_init(zone);
 | 
			
		||||
	zone_pcp_init(zone);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Set up the zone data structures
 | 
			
		||||
 * - init pgdat internals
 | 
			
		||||
 * - init all zones belonging to this node
 | 
			
		||||
 *
 | 
			
		||||
 * NOTE: this function is only called during memory hotplug
 | 
			
		||||
 */
 | 
			
		||||
#ifdef CONFIG_MEMORY_HOTPLUG
 | 
			
		||||
void __ref free_area_init_core_hotplug(int nid)
 | 
			
		||||
{
 | 
			
		||||
	enum zone_type z;
 | 
			
		||||
	pg_data_t *pgdat = NODE_DATA(nid);
 | 
			
		||||
 | 
			
		||||
	pgdat_init_internals(pgdat);
 | 
			
		||||
	for (z = 0; z < MAX_NR_ZONES; z++)
 | 
			
		||||
		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Set up the zone data structures:
 | 
			
		||||
 *   - mark all pages reserved
 | 
			
		||||
 *   - mark all memory queues empty
 | 
			
		||||
 *   - clear the memory bitmaps
 | 
			
		||||
 *
 | 
			
		||||
 * NOTE: pgdat should get zeroed by caller.
 | 
			
		||||
 * NOTE: this function is only called during early init.
 | 
			
		||||
 */
 | 
			
		||||
static void __init free_area_init_core(struct pglist_data *pgdat)
 | 
			
		||||
{
 | 
			
		||||
	enum zone_type j;
 | 
			
		||||
	int nid = pgdat->node_id;
 | 
			
		||||
 | 
			
		||||
	pgdat_init_internals(pgdat);
 | 
			
		||||
	pgdat->per_cpu_nodestats = &boot_nodestats;
 | 
			
		||||
 | 
			
		||||
	for (j = 0; j < MAX_NR_ZONES; j++) {
 | 
			
		||||
| 
						 | 
				
			
			@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 | 
			
		|||
		 * when the bootmem allocator frees pages into the buddy system.
 | 
			
		||||
		 * And all highmem pages will be managed by the buddy system.
 | 
			
		||||
		 */
 | 
			
		||||
		zone->managed_pages = freesize;
 | 
			
		||||
		zone_set_nid(zone, nid);
 | 
			
		||||
		zone->name = zone_names[j];
 | 
			
		||||
		zone->zone_pgdat = pgdat;
 | 
			
		||||
		spin_lock_init(&zone->lock);
 | 
			
		||||
		zone_seqlock_init(zone);
 | 
			
		||||
		zone_pcp_init(zone);
 | 
			
		||||
		zone_init_internals(zone, j, nid, freesize);
 | 
			
		||||
 | 
			
		||||
		if (!size)
 | 
			
		||||
			continue;
 | 
			
		||||
| 
						 | 
				
			
			@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 | 
			
		|||
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void __meminit free_area_init_node(int nid, unsigned long *zones_size,
 | 
			
		||||
void __init free_area_init_node(int nid, unsigned long *zones_size,
 | 
			
		||||
				   unsigned long node_start_pfn,
 | 
			
		||||
				   unsigned long *zholes_size)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
 | 
			
		|||
 * may be accessed (for example page_to_pfn() on some configuration accesses
 | 
			
		||||
 * flags). We must explicitly zero those struct pages.
 | 
			
		||||
 */
 | 
			
		||||
void __meminit zero_resv_unavail(void)
 | 
			
		||||
void __init zero_resv_unavail(void)
 | 
			
		||||
{
 | 
			
		||||
	phys_addr_t start, end;
 | 
			
		||||
	unsigned long pfn;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue