forked from mirrors/linux
		
	Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE
The NUMA layer only supports NUMA policies for the highest zone. When ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes ZONE_MOVABLE. The result is that policies are only applied to allocations like anonymous pages and page cache allocated from ZONE_MOVABLE when the zone is used. This patch applies policies to the two highest zones when the highest zone is ZONE_MOVABLE. As ZONE_MOVABLE consists of pages from the highest "real" zone, it's always functionally equivalent. The patch has been tested on a variety of machines both NUMA and non-NUMA covering x86, x86_64 and ppc64. No abnormal results were seen in kernbench, tbench, dbench or hackbench. It passes regression tests from the numactl package with and without kernelcore= once numactl tests are patched to wait for vmstat counters to update. akpm: this is the nasty hack to fix NUMA mempolicies in the presence of ZONE_MOVABLE and kernelcore= in 2.6.23. Christoph says "For .24 either merge the mobility or get the other solution that Mel is working on. That solution would only use a single zonelist per node and filter on the fly. That may help performance and also help to make memory policies work better." Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Tested-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@suse.de> Cc: Paul Mundt <lethal@linux-sh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									8e92f21ba3
								
							
						
					
					
						commit
						b377fd3982
					
				
					 4 changed files with 33 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -166,7 +166,7 @@ extern enum zone_type policy_zone;
 | 
			
		|||
 | 
			
		||||
static inline void check_highest_zone(enum zone_type k)
 | 
			
		||||
{
 | 
			
		||||
	if (k > policy_zone)
 | 
			
		||||
	if (k > policy_zone && k != ZONE_MOVABLE)
 | 
			
		||||
		policy_zone = k;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -410,6 +410,24 @@ struct zonelist {
 | 
			
		|||
#endif
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_NUMA
 | 
			
		||||
/*
 | 
			
		||||
 * Only custom zonelists like MPOL_BIND need to be filtered as part of
 | 
			
		||||
 * policies. As described in the comment for struct zonelist_cache, these
 | 
			
		||||
 * zonelists will not have a zlcache so zlcache_ptr will not be set. Use
 | 
			
		||||
 * that to determine if the zonelists needs to be filtered or not.
 | 
			
		||||
 */
 | 
			
		||||
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
 | 
			
		||||
{
 | 
			
		||||
	return !zonelist->zlcache_ptr;
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
#endif /* CONFIG_NUMA */
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
 | 
			
		||||
struct node_active_region {
 | 
			
		||||
	unsigned long start_pfn;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
 | 
			
		|||
	   lower zones etc. Avoid empty zones because the memory allocator
 | 
			
		||||
	   doesn't like them. If you implement node hot removal you
 | 
			
		||||
	   have to fix that. */
 | 
			
		||||
	k = policy_zone;
 | 
			
		||||
	k = MAX_NR_ZONES - 1;
 | 
			
		||||
	while (1) {
 | 
			
		||||
		for_each_node_mask(nd, *nodes) { 
 | 
			
		||||
			struct zone *z = &NODE_DATA(nd)->node_zones[k];
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 | 
			
		|||
	nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
 | 
			
		||||
	int zlc_active = 0;		/* set if using zonelist_cache */
 | 
			
		||||
	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
 | 
			
		||||
	enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
 | 
			
		||||
 | 
			
		||||
zonelist_scan:
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1166,6 +1167,18 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 | 
			
		|||
	z = zonelist->zones;
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		/*
 | 
			
		||||
		 * In NUMA, this could be a policy zonelist which contains
 | 
			
		||||
		 * zones that may not be allowed by the current gfp_mask.
 | 
			
		||||
		 * Check the zone is allowed by the current flags
 | 
			
		||||
		 */
 | 
			
		||||
		if (unlikely(alloc_should_filter_zonelist(zonelist))) {
 | 
			
		||||
			if (highest_zoneidx == -1)
 | 
			
		||||
				highest_zoneidx = gfp_zone(gfp_mask);
 | 
			
		||||
			if (zone_idx(*z) > highest_zoneidx)
 | 
			
		||||
				continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (NUMA_BUILD && zlc_active &&
 | 
			
		||||
			!zlc_zone_worth_trying(zonelist, z, allowednodes))
 | 
			
		||||
				continue;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue