forked from mirrors/linux
		
	mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator
Firmware Assisted Dump (FA_DUMP) on ppc64 reserves substantial amounts
of memory when booting a secondary kernel.  Srikar Dronamraju reported
that multiple nodes may have no memory managed by the buddy allocator
but still return true for populated_zone().
Commit 1d82de618d ("mm, vmscan: make kswapd reclaim in terms of
nodes") was reported to cause kswapd to spin at 100% CPU usage when
fadump was enabled.  The old code happened to deal with the situation of
a populated node with zero free pages by co-incidence but the current
code tries to reclaim populated zones without realising that is
impossible.
We cannot just convert populated_zone() as many existing users really
need to check for present_pages.  This patch introduces a managed_zone()
helper and uses it in the few cases where it is critical that the check
is made for managed pages -- zonelist construction and page reclaim.
Link: http://lkml.kernel.org/r/20160831195104.GB8119@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									e6173ba42b
								
							
						
					
					
						commit
						6aa303defb
					
				
					 3 changed files with 27 additions and 15 deletions
				
			
		|  | @ -828,9 +828,21 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | |||
|  */ | ||||
| #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones) | ||||
| 
 | ||||
| static inline int populated_zone(struct zone *zone) | ||||
| /*
 | ||||
|  * Returns true if a zone has pages managed by the buddy allocator. | ||||
|  * All the reclaim decisions have to use this function rather than | ||||
|  * populated_zone(). If the whole zone is reserved then we can easily | ||||
|  * end up with populated_zone() && !managed_zone(). | ||||
|  */ | ||||
| static inline bool managed_zone(struct zone *zone) | ||||
| { | ||||
| 	return (!!zone->present_pages); | ||||
| 	return zone->managed_pages; | ||||
| } | ||||
| 
 | ||||
| /* Returns true if a zone has memory */ | ||||
| static inline bool populated_zone(struct zone *zone) | ||||
| { | ||||
| 	return zone->present_pages; | ||||
| } | ||||
| 
 | ||||
| extern int movable_zone; | ||||
|  |  | |||
|  | @ -4360,7 +4360,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, | |||
| 	do { | ||||
| 		zone_type--; | ||||
| 		zone = pgdat->node_zones + zone_type; | ||||
| 		if (populated_zone(zone)) { | ||||
| 		if (managed_zone(zone)) { | ||||
| 			zoneref_set_zone(zone, | ||||
| 				&zonelist->_zonerefs[nr_zones++]); | ||||
| 			check_highest_zone(zone_type); | ||||
|  | @ -4598,7 +4598,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | |||
| 		for (j = 0; j < nr_nodes; j++) { | ||||
| 			node = node_order[j]; | ||||
| 			z = &NODE_DATA(node)->node_zones[zone_type]; | ||||
| 			if (populated_zone(z)) { | ||||
| 			if (managed_zone(z)) { | ||||
| 				zoneref_set_zone(z, | ||||
| 					&zonelist->_zonerefs[pos++]); | ||||
| 				check_highest_zone(zone_type); | ||||
|  |  | |||
							
								
								
									
										22
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								mm/vmscan.c
									
									
									
									
									
								
							|  | @ -1665,7 +1665,7 @@ static bool inactive_reclaimable_pages(struct lruvec *lruvec, | |||
| 
 | ||||
| 	for (zid = sc->reclaim_idx; zid >= 0; zid--) { | ||||
| 		zone = &pgdat->node_zones[zid]; | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + | ||||
|  | @ -2036,7 +2036,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, | |||
| 		struct zone *zone = &pgdat->node_zones[zid]; | ||||
| 		unsigned long inactive_zone, active_zone; | ||||
| 
 | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		inactive_zone = zone_page_state(zone, | ||||
|  | @ -2171,7 +2171,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, | |||
| 
 | ||||
| 		for (z = 0; z < MAX_NR_ZONES; z++) { | ||||
| 			struct zone *zone = &pgdat->node_zones[z]; | ||||
| 			if (!populated_zone(zone)) | ||||
| 			if (!managed_zone(zone)) | ||||
| 				continue; | ||||
| 
 | ||||
| 			total_high_wmark += high_wmark_pages(zone); | ||||
|  | @ -2510,7 +2510,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, | |||
| 	/* If compaction would go ahead or the allocation would succeed, stop */ | ||||
| 	for (z = 0; z <= sc->reclaim_idx; z++) { | ||||
| 		struct zone *zone = &pgdat->node_zones[z]; | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { | ||||
|  | @ -2840,7 +2840,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) | |||
| 
 | ||||
| 	for (i = 0; i <= ZONE_NORMAL; i++) { | ||||
| 		zone = &pgdat->node_zones[i]; | ||||
| 		if (!populated_zone(zone) || | ||||
| 		if (!managed_zone(zone) || | ||||
| 		    pgdat_reclaimable_pages(pgdat) == 0) | ||||
| 			continue; | ||||
| 
 | ||||
|  | @ -3141,7 +3141,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) | |||
| 	for (i = 0; i <= classzone_idx; i++) { | ||||
| 		struct zone *zone = pgdat->node_zones + i; | ||||
| 
 | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (!zone_balanced(zone, order, classzone_idx)) | ||||
|  | @ -3169,7 +3169,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat, | |||
| 	sc->nr_to_reclaim = 0; | ||||
| 	for (z = 0; z <= sc->reclaim_idx; z++) { | ||||
| 		zone = pgdat->node_zones + z; | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); | ||||
|  | @ -3242,7 +3242,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
| 		if (buffer_heads_over_limit) { | ||||
| 			for (i = MAX_NR_ZONES - 1; i >= 0; i--) { | ||||
| 				zone = pgdat->node_zones + i; | ||||
| 				if (!populated_zone(zone)) | ||||
| 				if (!managed_zone(zone)) | ||||
| 					continue; | ||||
| 
 | ||||
| 				sc.reclaim_idx = i; | ||||
|  | @ -3262,7 +3262,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) | |||
| 		 */ | ||||
| 		for (i = classzone_idx; i >= 0; i--) { | ||||
| 			zone = pgdat->node_zones + i; | ||||
| 			if (!populated_zone(zone)) | ||||
| 			if (!managed_zone(zone)) | ||||
| 				continue; | ||||
| 
 | ||||
| 			if (zone_balanced(zone, sc.order, classzone_idx)) | ||||
|  | @ -3508,7 +3508,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | |||
| 	pg_data_t *pgdat; | ||||
| 	int z; | ||||
| 
 | ||||
| 	if (!populated_zone(zone)) | ||||
| 	if (!managed_zone(zone)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) | ||||
|  | @ -3522,7 +3522,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | |||
| 	/* Only wake kswapd if all zones are unbalanced */ | ||||
| 	for (z = 0; z <= classzone_idx; z++) { | ||||
| 		zone = pgdat->node_zones + z; | ||||
| 		if (!populated_zone(zone)) | ||||
| 		if (!managed_zone(zone)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (zone_balanced(zone, order, classzone_idx)) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Mel Gorman
						Mel Gorman