mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	hugetlb: add hugetlb demote page support
Demote page functionality will split a huge page into a number of huge pages of a smaller size. For example, on x86 a 1GB huge page can be demoted into 512 2M huge pages. Demotion is done 'in place' by simply splitting the huge page. Added '*_for_demote' wrappers for remove_hugetlb_page, destroy_compound_hugetlb_page and prep_compound_gigantic_page for use by demote code. [mike.kravetz@oracle.com: v4] Link: https://lkml.kernel.org/r/6ca29b8e-527c-d6ec-900e-e6a43e4f8b73@oracle.com Link: https://lkml.kernel.org/r/20211007181918.136982-6-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Nghia Le <nghialm78@gmail.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									34d9e35b13
								
							
						
					
					
						commit
						8531fc6f52
					
				
					 1 changed files with 92 additions and 8 deletions
				
			
		
							
								
								
									
										100
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										100
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							| 
						 | 
					@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 | 
				
			||||||
		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 | 
							((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 | 
				
			||||||
		nr_nodes--)
 | 
							nr_nodes--)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 | 
					/* used to demote non-gigantic_huge pages as well */
 | 
				
			||||||
static void __destroy_compound_gigantic_page(struct page *page,
 | 
					static void __destroy_compound_gigantic_page(struct page *page,
 | 
				
			||||||
					unsigned int order, bool demote)
 | 
										unsigned int order, bool demote)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
 | 
				
			||||||
	__ClearPageHead(page);
 | 
						__ClearPageHead(page);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void destroy_compound_hugetlb_page_for_demote(struct page *page,
 | 
				
			||||||
 | 
										unsigned int order)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__destroy_compound_gigantic_page(page, order, true);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 | 
				
			||||||
static void destroy_compound_gigantic_page(struct page *page,
 | 
					static void destroy_compound_gigantic_page(struct page *page,
 | 
				
			||||||
					unsigned int order)
 | 
										unsigned int order)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
 | 
				
			||||||
	__remove_hugetlb_page(h, page, adjust_surplus, false);
 | 
						__remove_hugetlb_page(h, page, adjust_surplus, false);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
 | 
				
			||||||
 | 
												bool adjust_surplus)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__remove_hugetlb_page(h, page, adjust_surplus, true);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void add_hugetlb_page(struct hstate *h, struct page *page,
 | 
					static void add_hugetlb_page(struct hstate *h, struct page *page,
 | 
				
			||||||
			     bool adjust_surplus)
 | 
								     bool adjust_surplus)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
 | 
				
			||||||
	return __prep_compound_gigantic_page(page, order, false);
 | 
						return __prep_compound_gigantic_page(page, order, false);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool prep_compound_gigantic_page_for_demote(struct page *page,
 | 
				
			||||||
 | 
												unsigned int order)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return __prep_compound_gigantic_page(page, order, true);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * PageHuge() only returns true for hugetlbfs pages, but not for normal or
 | 
					 * PageHuge() only returns true for hugetlbfs pages, but not for normal or
 | 
				
			||||||
 * transparent huge pages.  See the PageTransHuge() documentation for more
 | 
					 * transparent huge pages.  See the PageTransHuge() documentation for more
 | 
				
			||||||
| 
						 | 
					@ -3304,9 +3323,72 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int demote_free_huge_page(struct hstate *h, struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i, nid = page_to_nid(page);
 | 
				
			||||||
 | 
						struct hstate *target_hstate;
 | 
				
			||||||
 | 
						int rc = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						remove_hugetlb_page_for_demote(h, page, false);
 | 
				
			||||||
 | 
						spin_unlock_irq(&hugetlb_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rc = alloc_huge_page_vmemmap(h, page);
 | 
				
			||||||
 | 
						if (rc) {
 | 
				
			||||||
 | 
							/* Allocation of vmemmmap failed, we can not demote page */
 | 
				
			||||||
 | 
							spin_lock_irq(&hugetlb_lock);
 | 
				
			||||||
 | 
							set_page_refcounted(page);
 | 
				
			||||||
 | 
							add_hugetlb_page(h, page, false);
 | 
				
			||||||
 | 
							return rc;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Use destroy_compound_hugetlb_page_for_demote for all huge page
 | 
				
			||||||
 | 
						 * sizes as it will not ref count pages.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Taking target hstate mutex synchronizes with set_max_huge_pages.
 | 
				
			||||||
 | 
						 * Without the mutex, pages added to target hstate could be marked
 | 
				
			||||||
 | 
						 * as surplus.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Note that we already hold h->resize_lock.  To prevent deadlock,
 | 
				
			||||||
 | 
						 * use the convention of always taking larger size hstate mutex first.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						mutex_lock(&target_hstate->resize_lock);
 | 
				
			||||||
 | 
						for (i = 0; i < pages_per_huge_page(h);
 | 
				
			||||||
 | 
									i += pages_per_huge_page(target_hstate)) {
 | 
				
			||||||
 | 
							if (hstate_is_gigantic(target_hstate))
 | 
				
			||||||
 | 
								prep_compound_gigantic_page_for_demote(page + i,
 | 
				
			||||||
 | 
												target_hstate->order);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								prep_compound_page(page + i, target_hstate->order);
 | 
				
			||||||
 | 
							set_page_private(page + i, 0);
 | 
				
			||||||
 | 
							set_page_refcounted(page + i);
 | 
				
			||||||
 | 
							prep_new_huge_page(target_hstate, page + i, nid);
 | 
				
			||||||
 | 
							put_page(page + i);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						mutex_unlock(&target_hstate->resize_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irq(&hugetlb_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Not absolutely necessary, but for consistency update max_huge_pages
 | 
				
			||||||
 | 
						 * based on pool changes for the demoted page.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						h->max_huge_pages--;
 | 
				
			||||||
 | 
						target_hstate->max_huge_pages += pages_per_huge_page(h);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return rc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
 | 
					static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
 | 
				
			||||||
	__must_hold(&hugetlb_lock)
 | 
						__must_hold(&hugetlb_lock)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int nr_nodes, node;
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
	int rc = 0;
 | 
						int rc = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lockdep_assert_held(&hugetlb_lock);
 | 
						lockdep_assert_held(&hugetlb_lock);
 | 
				
			||||||
| 
						 | 
					@ -3317,9 +3399,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
 | 
				
			||||||
		return -EINVAL;		/* internal error */
 | 
							return -EINVAL;		/* internal error */
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
 | 
				
			||||||
	 * TODO - demote fucntionality will be added in subsequent patch
 | 
							if (!list_empty(&h->hugepage_freelists[node])) {
 | 
				
			||||||
	 */
 | 
								page = list_entry(h->hugepage_freelists[node].next,
 | 
				
			||||||
 | 
										struct page, lru);
 | 
				
			||||||
 | 
								rc = demote_free_huge_page(h, page);
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return rc;
 | 
						return rc;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3554,10 +3642,6 @@ static ssize_t demote_store(struct kobject *kobj,
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Check for available pages to demote each time thorough the
 | 
							 * Check for available pages to demote each time thorough the
 | 
				
			||||||
		 * loop as demote_pool_huge_page will drop hugetlb_lock.
 | 
							 * loop as demote_pool_huge_page will drop hugetlb_lock.
 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
 | 
					 | 
				
			||||||
		 * but will when full demote functionality is added in a later
 | 
					 | 
				
			||||||
		 * patch.
 | 
					 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (nid != NUMA_NO_NODE)
 | 
							if (nid != NUMA_NO_NODE)
 | 
				
			||||||
			nr_available = h->free_huge_pages_node[nid];
 | 
								nr_available = h->free_huge_pages_node[nid];
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue