forked from mirrors/linux
		
	mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma
The previous commit ("mm/thp: Allocate transparent hugepages on local
node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a
special policy for THP allocations.  The function has the same interface
as alloc_pages_vma(), shares a lot of boilerplate code and a long
comment.
This patch merges the hugepage special case into alloc_pages_vma.  The
extra if condition should be cheap enough price to pay.  We also prevent
a (however unlikely) race with parallel mems_allowed update, which could
make hugepage allocation restart only within the fallback call to
alloc_hugepage_vma() and not reconsider the special rule in
alloc_hugepage_vma().
Also by making sure mpol_cond_put(pol) is always called before actual
allocation attempt, we can use a single exit path within the function.
Also update the comment for missing node parameter and obsolete reference
to mm_sem.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									077fcf116c
								
							
						
					
					
						commit
						be97a41b29
					
				
					 2 changed files with 39 additions and 91 deletions
				
			
		| 
						 | 
					@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 | 
					extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 | 
				
			||||||
			struct vm_area_struct *vma, unsigned long addr,
 | 
								struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
			int node);
 | 
								int node, bool hugepage);
 | 
				
			||||||
extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
 | 
					#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
 | 
				
			||||||
				       unsigned long addr, int order);
 | 
						alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define alloc_pages(gfp_mask, order) \
 | 
					#define alloc_pages(gfp_mask, order) \
 | 
				
			||||||
		alloc_pages_node(numa_node_id(), gfp_mask, order)
 | 
							alloc_pages_node(numa_node_id(), gfp_mask, order)
 | 
				
			||||||
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)	\
 | 
					#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
 | 
				
			||||||
	alloc_pages(gfp_mask, order)
 | 
						alloc_pages(gfp_mask, order)
 | 
				
			||||||
#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
 | 
					#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
 | 
				
			||||||
	alloc_pages(gfp_mask, order)
 | 
						alloc_pages(gfp_mask, order)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 | 
					#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 | 
				
			||||||
#define alloc_page_vma(gfp_mask, vma, addr)			\
 | 
					#define alloc_page_vma(gfp_mask, vma, addr)			\
 | 
				
			||||||
	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 | 
						alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
 | 
				
			||||||
#define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
 | 
					#define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
 | 
				
			||||||
	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 | 
						alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
 | 
					extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
 | 
				
			||||||
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
 | 
					extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										118
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							
							
						
						
									
										118
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							| 
						 | 
					@ -1988,120 +1988,68 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 | 
				
			||||||
 *	@order:Order of the GFP allocation.
 | 
					 *	@order:Order of the GFP allocation.
 | 
				
			||||||
 * 	@vma:  Pointer to VMA or NULL if not available.
 | 
					 * 	@vma:  Pointer to VMA or NULL if not available.
 | 
				
			||||||
 *	@addr: Virtual Address of the allocation. Must be inside the VMA.
 | 
					 *	@addr: Virtual Address of the allocation. Must be inside the VMA.
 | 
				
			||||||
 | 
					 *	@node: Which node to prefer for allocation (modulo policy).
 | 
				
			||||||
 | 
					 *	@hugepage: for hugepages try only the preferred node if possible
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * 	This function allocates a page from the kernel page pool and applies
 | 
					 * 	This function allocates a page from the kernel page pool and applies
 | 
				
			||||||
 *	a NUMA policy associated with the VMA or the current process.
 | 
					 *	a NUMA policy associated with the VMA or the current process.
 | 
				
			||||||
 *	When VMA is not NULL caller must hold down_read on the mmap_sem of the
 | 
					 *	When VMA is not NULL caller must hold down_read on the mmap_sem of the
 | 
				
			||||||
 *	mm_struct of the VMA to prevent it from going away. Should be used for
 | 
					 *	mm_struct of the VMA to prevent it from going away. Should be used for
 | 
				
			||||||
 *	all allocations for pages that will be mapped into
 | 
					 *	all allocations for pages that will be mapped into user space. Returns
 | 
				
			||||||
 * 	user space. Returns NULL when no page can be allocated.
 | 
					 *	NULL when no page can be allocated.
 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *	Should be called with the mm_sem of the vma hold.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct page *
 | 
					struct page *
 | 
				
			||||||
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 | 
					alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 | 
				
			||||||
		unsigned long addr, int node)
 | 
							unsigned long addr, int node, bool hugepage)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mempolicy *pol;
 | 
						struct mempolicy *pol;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	unsigned int cpuset_mems_cookie;
 | 
						unsigned int cpuset_mems_cookie;
 | 
				
			||||||
 | 
						struct zonelist *zl;
 | 
				
			||||||
 | 
						nodemask_t *nmask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
retry_cpuset:
 | 
					retry_cpuset:
 | 
				
			||||||
	pol = get_vma_policy(vma, addr);
 | 
						pol = get_vma_policy(vma, addr);
 | 
				
			||||||
	cpuset_mems_cookie = read_mems_allowed_begin();
 | 
						cpuset_mems_cookie = read_mems_allowed_begin();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
 | 
						if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
 | 
				
			||||||
 | 
										pol->mode != MPOL_INTERLEAVE)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * For hugepage allocation and non-interleave policy which
 | 
				
			||||||
 | 
							 * allows the current node, we only try to allocate from the
 | 
				
			||||||
 | 
							 * current node and don't fall back to other nodes, as the
 | 
				
			||||||
 | 
							 * cost of remote accesses would likely offset THP benefits.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * If the policy is interleave, or does not allow the current
 | 
				
			||||||
 | 
							 * node in its nodemask, we allocate the standard way.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							nmask = policy_nodemask(gfp, pol);
 | 
				
			||||||
 | 
							if (!nmask || node_isset(node, *nmask)) {
 | 
				
			||||||
 | 
								mpol_cond_put(pol);
 | 
				
			||||||
 | 
								page = alloc_pages_exact_node(node, gfp, order);
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (pol->mode == MPOL_INTERLEAVE) {
 | 
				
			||||||
		unsigned nid;
 | 
							unsigned nid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
 | 
							nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
 | 
				
			||||||
		mpol_cond_put(pol);
 | 
							mpol_cond_put(pol);
 | 
				
			||||||
		page = alloc_page_interleave(gfp, order, nid);
 | 
							page = alloc_page_interleave(gfp, order, nid);
 | 
				
			||||||
		if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 | 
							goto out;
 | 
				
			||||||
			goto retry_cpuset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		return page;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	page = __alloc_pages_nodemask(gfp, order,
 | 
					
 | 
				
			||||||
				      policy_zonelist(gfp, pol, node),
 | 
						nmask = policy_nodemask(gfp, pol);
 | 
				
			||||||
				      policy_nodemask(gfp, pol));
 | 
						zl = policy_zonelist(gfp, pol, node);
 | 
				
			||||||
	mpol_cond_put(pol);
 | 
						mpol_cond_put(pol);
 | 
				
			||||||
 | 
						page = __alloc_pages_nodemask(gfp, order, zl, nmask);
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 | 
						if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 | 
				
			||||||
		goto retry_cpuset;
 | 
							goto retry_cpuset;
 | 
				
			||||||
	return page;
 | 
						return page;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | 
					 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
 * alloc_hugepage_vma: Allocate a hugepage for a VMA
 | 
					 | 
				
			||||||
 * @gfp:
 | 
					 | 
				
			||||||
 *   %GFP_USER	  user allocation.
 | 
					 | 
				
			||||||
 *   %GFP_KERNEL  kernel allocations,
 | 
					 | 
				
			||||||
 *   %GFP_HIGHMEM highmem/user allocations,
 | 
					 | 
				
			||||||
 *   %GFP_FS	  allocation should not call back into a file system.
 | 
					 | 
				
			||||||
 *   %GFP_ATOMIC  don't sleep.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * @vma:   Pointer to VMA or NULL if not available.
 | 
					 | 
				
			||||||
 * @addr:  Virtual Address of the allocation. Must be inside the VMA.
 | 
					 | 
				
			||||||
 * @order: Order of the hugepage for gfp allocation.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This functions allocate a huge page from the kernel page pool and applies
 | 
					 | 
				
			||||||
 * a NUMA policy associated with the VMA or the current process.
 | 
					 | 
				
			||||||
 * For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage
 | 
					 | 
				
			||||||
 * only from the current node if the current node is part of the node mask.
 | 
					 | 
				
			||||||
 * If we can't allocate a hugepage we fail the allocation and don' try to fallback
 | 
					 | 
				
			||||||
 * to other nodes in the node mask. If the current node is not part of node mask
 | 
					 | 
				
			||||||
 * or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can
 | 
					 | 
				
			||||||
 * fallback to nodes in the policy node mask.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * When VMA is not NULL caller must hold down_read on the mmap_sem of the
 | 
					 | 
				
			||||||
 * mm_struct of the VMA to prevent it from going away. Should be used for
 | 
					 | 
				
			||||||
 * all allocations for pages that will be mapped into
 | 
					 | 
				
			||||||
 * user space. Returns NULL when no page can be allocated.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Should be called with vma->vm_mm->mmap_sem held.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
 | 
					 | 
				
			||||||
				unsigned long addr, int order)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct page *page;
 | 
					 | 
				
			||||||
	nodemask_t *nmask;
 | 
					 | 
				
			||||||
	struct mempolicy *pol;
 | 
					 | 
				
			||||||
	int node = numa_node_id();
 | 
					 | 
				
			||||||
	unsigned int cpuset_mems_cookie;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
retry_cpuset:
 | 
					 | 
				
			||||||
	pol = get_vma_policy(vma, addr);
 | 
					 | 
				
			||||||
	cpuset_mems_cookie = read_mems_allowed_begin();
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * For interleave policy, we don't worry about
 | 
					 | 
				
			||||||
	 * current node. Otherwise if current node is
 | 
					 | 
				
			||||||
	 * in nodemask, try to allocate hugepage from
 | 
					 | 
				
			||||||
	 * the current node. Don't fall back to other nodes
 | 
					 | 
				
			||||||
	 * for THP.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (unlikely(pol->mode == MPOL_INTERLEAVE))
 | 
					 | 
				
			||||||
		goto alloc_with_fallback;
 | 
					 | 
				
			||||||
	nmask = policy_nodemask(gfp, pol);
 | 
					 | 
				
			||||||
	if (!nmask || node_isset(node, *nmask)) {
 | 
					 | 
				
			||||||
		mpol_cond_put(pol);
 | 
					 | 
				
			||||||
		page = alloc_pages_exact_node(node, gfp, order);
 | 
					 | 
				
			||||||
		if (unlikely(!page &&
 | 
					 | 
				
			||||||
			     read_mems_allowed_retry(cpuset_mems_cookie)))
 | 
					 | 
				
			||||||
			goto retry_cpuset;
 | 
					 | 
				
			||||||
		return page;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
alloc_with_fallback:
 | 
					 | 
				
			||||||
	mpol_cond_put(pol);
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * if current node is not part of node mask, try
 | 
					 | 
				
			||||||
	 * the allocation from any node, and we can do retry
 | 
					 | 
				
			||||||
	 * in that case.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	return alloc_pages_vma(gfp, order, vma, addr, node);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * 	alloc_pages_current - Allocate pages.
 | 
					 * 	alloc_pages_current - Allocate pages.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue