forked from mirrors/linux
		
	hugetlb: simplify hugetlb handling in follow_page_mask
During discussions of this series [1], it was suggested that hugetlb
handling code in follow_page_mask could be simplified.  At the beginning
of follow_page_mask, there currently is a call to follow_huge_addr which
'may' handle hugetlb pages.  ia64 is the only architecture which provides
a follow_huge_addr routine that does not return error.  Instead, at each
level of the page table a check is made for a hugetlb entry.  If a hugetlb
entry is found, a call to a routine associated with that entry is made.
Currently, there are two checks for hugetlb entries at each page table
level.  The first check is of the form:
        if (p?d_huge())
                page = follow_huge_p?d();
the second check is of the form:
        if (is_hugepd())
                page = follow_huge_pd().
We can replace these checks, as well as the special handling routines such
as follow_huge_p?d() and follow_huge_pd() with a single routine to handle
hugetlb vmas.
A new routine hugetlb_follow_page_mask is called for hugetlb vmas at the
beginning of follow_page_mask.  hugetlb_follow_page_mask will use the
existing routine huge_pte_offset to walk page tables looking for hugetlb
entries.  huge_pte_offset can be overwritten by architectures, and already
handles special cases such as hugepd entries.
[1] https://lore.kernel.org/linux-mm/cover.1661240170.git.baolin.wang@linux.alibaba.com/
[mike.kravetz@oracle.com: remove vma (pmd sharing) per Peter]
  Link: https://lkml.kernel.org/r/20221028181108.119432-1-mike.kravetz@oracle.com
[mike.kravetz@oracle.com: remove left over hugetlb_vma_unlock_read()]
  Link: https://lkml.kernel.org/r/20221030225825.40872-1-mike.kravetz@oracle.com
Link: https://lkml.kernel.org/r/20220919021348.22151-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									f0c4d9fc9c
								
							
						
					
					
						commit
						57a196a584
					
				
					 5 changed files with 76 additions and 278 deletions
				
			
		|  | @ -91,21 +91,6 @@ int prepare_hugepage_range(struct file *file, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) |  | ||||||
| { |  | ||||||
| 	struct page *page; |  | ||||||
| 	pte_t *ptep; |  | ||||||
| 
 |  | ||||||
| 	if (REGION_NUMBER(addr) != RGN_HPAGE) |  | ||||||
| 		return ERR_PTR(-EINVAL); |  | ||||||
| 
 |  | ||||||
| 	ptep = huge_pte_offset(mm, addr, HPAGE_SIZE); |  | ||||||
| 	if (!ptep || pte_none(*ptep)) |  | ||||||
| 		return NULL; |  | ||||||
| 	page = pte_page(*ptep); |  | ||||||
| 	page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); |  | ||||||
| 	return page; |  | ||||||
| } |  | ||||||
| int pmd_huge(pmd_t pmd) | int pmd_huge(pmd_t pmd) | ||||||
| { | { | ||||||
| 	return 0; | 	return 0; | ||||||
|  |  | ||||||
|  | @ -506,43 +506,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | ||||||
| 	} while (addr = next, addr != end); | 	} while (addr = next, addr != end); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| struct page *follow_huge_pd(struct vm_area_struct *vma, |  | ||||||
| 			    unsigned long address, hugepd_t hpd, |  | ||||||
| 			    int flags, int pdshift) |  | ||||||
| { |  | ||||||
| 	pte_t *ptep; |  | ||||||
| 	spinlock_t *ptl; |  | ||||||
| 	struct page *page = NULL; |  | ||||||
| 	unsigned long mask; |  | ||||||
| 	int shift = hugepd_shift(hpd); |  | ||||||
| 	struct mm_struct *mm = vma->vm_mm; |  | ||||||
| 
 |  | ||||||
| retry: |  | ||||||
| 	/*
 |  | ||||||
| 	 * hugepage directory entries are protected by mm->page_table_lock |  | ||||||
| 	 * Use this instead of huge_pte_lockptr |  | ||||||
| 	 */ |  | ||||||
| 	ptl = &mm->page_table_lock; |  | ||||||
| 	spin_lock(ptl); |  | ||||||
| 
 |  | ||||||
| 	ptep = hugepte_offset(hpd, address, pdshift); |  | ||||||
| 	if (pte_present(*ptep)) { |  | ||||||
| 		mask = (1UL << shift) - 1; |  | ||||||
| 		page = pte_page(*ptep); |  | ||||||
| 		page += ((address & mask) >> PAGE_SHIFT); |  | ||||||
| 		if (flags & FOLL_GET) |  | ||||||
| 			get_page(page); |  | ||||||
| 	} else { |  | ||||||
| 		if (is_hugetlb_entry_migration(*ptep)) { |  | ||||||
| 			spin_unlock(ptl); |  | ||||||
| 			__migration_entry_wait(mm, ptep, ptl); |  | ||||||
| 			goto retry; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	spin_unlock(ptl); |  | ||||||
| 	return page; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool __init arch_hugetlb_valid_size(unsigned long size) | bool __init arch_hugetlb_valid_size(unsigned long size) | ||||||
| { | { | ||||||
| 	int shift = __ffs(size); | 	int shift = __ffs(size); | ||||||
|  |  | ||||||
|  | @ -149,6 +149,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, | ||||||
| 			     unsigned long len); | 			     unsigned long len); | ||||||
| int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, | ||||||
| 			    struct vm_area_struct *, struct vm_area_struct *); | 			    struct vm_area_struct *, struct vm_area_struct *); | ||||||
|  | struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, | ||||||
|  | 				unsigned long address, unsigned int flags); | ||||||
| long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, | long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, | ||||||
| 			 struct page **, struct vm_area_struct **, | 			 struct page **, struct vm_area_struct **, | ||||||
| 			 unsigned long *, unsigned long *, long, unsigned int, | 			 unsigned long *, unsigned long *, long, unsigned int, | ||||||
|  | @ -209,17 +211,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, | ||||||
| 				unsigned long addr, pte_t *ptep); | 				unsigned long addr, pte_t *ptep); | ||||||
| void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, | void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, | ||||||
| 				unsigned long *start, unsigned long *end); | 				unsigned long *start, unsigned long *end); | ||||||
| struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, |  | ||||||
| 			      int write); |  | ||||||
| struct page *follow_huge_pd(struct vm_area_struct *vma, |  | ||||||
| 			    unsigned long address, hugepd_t hpd, |  | ||||||
| 			    int flags, int pdshift); |  | ||||||
| struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, |  | ||||||
| 				 int flags); |  | ||||||
| struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, |  | ||||||
| 				pud_t *pud, int flags); |  | ||||||
| struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, |  | ||||||
| 			     pgd_t *pgd, int flags); |  | ||||||
| 
 | 
 | ||||||
| void hugetlb_vma_lock_read(struct vm_area_struct *vma); | void hugetlb_vma_lock_read(struct vm_area_struct *vma); | ||||||
| void hugetlb_vma_unlock_read(struct vm_area_struct *vma); | void hugetlb_vma_unlock_read(struct vm_area_struct *vma); | ||||||
|  | @ -272,6 +263,12 @@ static inline void adjust_range_if_pmd_sharing_possible( | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, | ||||||
|  | 				unsigned long address, unsigned int flags) | ||||||
|  | { | ||||||
|  | 	BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline long follow_hugetlb_page(struct mm_struct *mm, | static inline long follow_hugetlb_page(struct mm_struct *mm, | ||||||
| 			struct vm_area_struct *vma, struct page **pages, | 			struct vm_area_struct *vma, struct page **pages, | ||||||
| 			struct vm_area_struct **vmas, unsigned long *position, | 			struct vm_area_struct **vmas, unsigned long *position, | ||||||
|  | @ -282,12 +279,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct page *follow_huge_addr(struct mm_struct *mm, |  | ||||||
| 					unsigned long address, int write) |  | ||||||
| { |  | ||||||
| 	return ERR_PTR(-EINVAL); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline int copy_hugetlb_page_range(struct mm_struct *dst, | static inline int copy_hugetlb_page_range(struct mm_struct *dst, | ||||||
| 					  struct mm_struct *src, | 					  struct mm_struct *src, | ||||||
| 					  struct vm_area_struct *dst_vma, | 					  struct vm_area_struct *dst_vma, | ||||||
|  | @ -320,31 +311,6 @@ static inline void hugetlb_show_meminfo_node(int nid) | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct page *follow_huge_pd(struct vm_area_struct *vma, |  | ||||||
| 				unsigned long address, hugepd_t hpd, int flags, |  | ||||||
| 				int pdshift) |  | ||||||
| { |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, |  | ||||||
| 				unsigned long address, int flags) |  | ||||||
| { |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct page *follow_huge_pud(struct mm_struct *mm, |  | ||||||
| 				unsigned long address, pud_t *pud, int flags) |  | ||||||
| { |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline struct page *follow_huge_pgd(struct mm_struct *mm, |  | ||||||
| 				unsigned long address, pgd_t *pgd, int flags) |  | ||||||
| { |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline int prepare_hugepage_range(struct file *file, | static inline int prepare_hugepage_range(struct file *file, | ||||||
| 				unsigned long addr, unsigned long len) | 				unsigned long addr, unsigned long len) | ||||||
| { | { | ||||||
|  |  | ||||||
							
								
								
									
										80
									
								
								mm/gup.c
									
									
									
									
									
								
							
							
						
						
									
										80
									
								
								mm/gup.c
									
									
									
									
									
								
							|  | @ -537,18 +537,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, | ||||||
| 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == | 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == | ||||||
| 			 (FOLL_PIN | FOLL_GET))) | 			 (FOLL_PIN | FOLL_GET))) | ||||||
| 		return ERR_PTR(-EINVAL); | 		return ERR_PTR(-EINVAL); | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Considering PTE level hugetlb, like continuous-PTE hugetlb on |  | ||||||
| 	 * ARM64 architecture. |  | ||||||
| 	 */ |  | ||||||
| 	if (is_vm_hugetlb_page(vma)) { |  | ||||||
| 		page = follow_huge_pmd_pte(vma, address, flags); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| retry: | retry: | ||||||
| 	if (unlikely(pmd_bad(*pmd))) | 	if (unlikely(pmd_bad(*pmd))) | ||||||
| 		return no_page_table(vma, flags); | 		return no_page_table(vma, flags); | ||||||
|  | @ -680,20 +668,6 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, | ||||||
| 	pmdval = READ_ONCE(*pmd); | 	pmdval = READ_ONCE(*pmd); | ||||||
| 	if (pmd_none(pmdval)) | 	if (pmd_none(pmdval)) | ||||||
| 		return no_page_table(vma, flags); | 		return no_page_table(vma, flags); | ||||||
| 	if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { |  | ||||||
| 		page = follow_huge_pmd_pte(vma, address, flags); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 	if (is_hugepd(__hugepd(pmd_val(pmdval)))) { |  | ||||||
| 		page = follow_huge_pd(vma, address, |  | ||||||
| 				      __hugepd(pmd_val(pmdval)), flags, |  | ||||||
| 				      PMD_SHIFT); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| retry: | retry: | ||||||
| 	if (!pmd_present(pmdval)) { | 	if (!pmd_present(pmdval)) { | ||||||
| 		/*
 | 		/*
 | ||||||
|  | @ -783,20 +757,6 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, | ||||||
| 	pud = pud_offset(p4dp, address); | 	pud = pud_offset(p4dp, address); | ||||||
| 	if (pud_none(*pud)) | 	if (pud_none(*pud)) | ||||||
| 		return no_page_table(vma, flags); | 		return no_page_table(vma, flags); | ||||||
| 	if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) { |  | ||||||
| 		page = follow_huge_pud(mm, address, pud, flags); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 	if (is_hugepd(__hugepd(pud_val(*pud)))) { |  | ||||||
| 		page = follow_huge_pd(vma, address, |  | ||||||
| 				      __hugepd(pud_val(*pud)), flags, |  | ||||||
| 				      PUD_SHIFT); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 	if (pud_devmap(*pud)) { | 	if (pud_devmap(*pud)) { | ||||||
| 		ptl = pud_lock(mm, pud); | 		ptl = pud_lock(mm, pud); | ||||||
| 		page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap); | 		page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap); | ||||||
|  | @ -816,7 +776,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, | ||||||
| 				    struct follow_page_context *ctx) | 				    struct follow_page_context *ctx) | ||||||
| { | { | ||||||
| 	p4d_t *p4d; | 	p4d_t *p4d; | ||||||
| 	struct page *page; |  | ||||||
| 
 | 
 | ||||||
| 	p4d = p4d_offset(pgdp, address); | 	p4d = p4d_offset(pgdp, address); | ||||||
| 	if (p4d_none(*p4d)) | 	if (p4d_none(*p4d)) | ||||||
|  | @ -825,14 +784,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, | ||||||
| 	if (unlikely(p4d_bad(*p4d))) | 	if (unlikely(p4d_bad(*p4d))) | ||||||
| 		return no_page_table(vma, flags); | 		return no_page_table(vma, flags); | ||||||
| 
 | 
 | ||||||
| 	if (is_hugepd(__hugepd(p4d_val(*p4d)))) { |  | ||||||
| 		page = follow_huge_pd(vma, address, |  | ||||||
| 				      __hugepd(p4d_val(*p4d)), flags, |  | ||||||
| 				      P4D_SHIFT); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 	return follow_pud_mask(vma, address, p4d, flags, ctx); | 	return follow_pud_mask(vma, address, p4d, flags, ctx); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -870,10 +821,18 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, | ||||||
| 
 | 
 | ||||||
| 	ctx->page_mask = 0; | 	ctx->page_mask = 0; | ||||||
| 
 | 
 | ||||||
| 	/* make this handle hugepd */ | 	/*
 | ||||||
| 	page = follow_huge_addr(mm, address, flags & FOLL_WRITE); | 	 * Call hugetlb_follow_page_mask for hugetlb vmas as it will use | ||||||
| 	if (!IS_ERR(page)) { | 	 * special hugetlb page table walking code.  This eliminates the | ||||||
| 		WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN)); | 	 * need to check for hugetlb entries in the general walking code. | ||||||
|  | 	 * | ||||||
|  | 	 * hugetlb_follow_page_mask is only for follow_page() handling here. | ||||||
|  | 	 * Ordinary GUP uses follow_hugetlb_page for hugetlb processing. | ||||||
|  | 	 */ | ||||||
|  | 	if (is_vm_hugetlb_page(vma)) { | ||||||
|  | 		page = hugetlb_follow_page_mask(vma, address, flags); | ||||||
|  | 		if (!page) | ||||||
|  | 			page = no_page_table(vma, flags); | ||||||
| 		return page; | 		return page; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -882,21 +841,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, | ||||||
| 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | ||||||
| 		return no_page_table(vma, flags); | 		return no_page_table(vma, flags); | ||||||
| 
 | 
 | ||||||
| 	if (pgd_huge(*pgd)) { |  | ||||||
| 		page = follow_huge_pgd(mm, address, pgd, flags); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 	if (is_hugepd(__hugepd(pgd_val(*pgd)))) { |  | ||||||
| 		page = follow_huge_pd(vma, address, |  | ||||||
| 				      __hugepd(pgd_val(*pgd)), flags, |  | ||||||
| 				      PGDIR_SHIFT); |  | ||||||
| 		if (page) |  | ||||||
| 			return page; |  | ||||||
| 		return no_page_table(vma, flags); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return follow_p4d_mask(vma, address, pgd, flags, ctx); | 	return follow_p4d_mask(vma, address, pgd, flags, ctx); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										172
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										172
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							|  | @ -6209,6 +6209,62 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, | ||||||
|  | 				unsigned long address, unsigned int flags) | ||||||
|  | { | ||||||
|  | 	struct hstate *h = hstate_vma(vma); | ||||||
|  | 	struct mm_struct *mm = vma->vm_mm; | ||||||
|  | 	unsigned long haddr = address & huge_page_mask(h); | ||||||
|  | 	struct page *page = NULL; | ||||||
|  | 	spinlock_t *ptl; | ||||||
|  | 	pte_t *pte, entry; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via | ||||||
|  | 	 * follow_hugetlb_page(). | ||||||
|  | 	 */ | ||||||
|  | 	if (WARN_ON_ONCE(flags & FOLL_PIN)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | retry: | ||||||
|  | 	pte = huge_pte_offset(mm, haddr, huge_page_size(h)); | ||||||
|  | 	if (!pte) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	ptl = huge_pte_lock(h, mm, pte); | ||||||
|  | 	entry = huge_ptep_get(pte); | ||||||
|  | 	if (pte_present(entry)) { | ||||||
|  | 		page = pte_page(entry) + | ||||||
|  | 				((address & ~huge_page_mask(h)) >> PAGE_SHIFT); | ||||||
|  | 		/*
 | ||||||
|  | 		 * Note that page may be a sub-page, and with vmemmap | ||||||
|  | 		 * optimizations the page struct may be read only. | ||||||
|  | 		 * try_grab_page() will increase the ref count on the | ||||||
|  | 		 * head page, so this will be OK. | ||||||
|  | 		 * | ||||||
|  | 		 * try_grab_page() should always succeed here, because we hold | ||||||
|  | 		 * the ptl lock and have verified pte_present(). | ||||||
|  | 		 */ | ||||||
|  | 		if (WARN_ON_ONCE(!try_grab_page(page, flags))) { | ||||||
|  | 			page = NULL; | ||||||
|  | 			goto out; | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		if (is_hugetlb_entry_migration(entry)) { | ||||||
|  | 			spin_unlock(ptl); | ||||||
|  | 			__migration_entry_wait_huge(pte, ptl); | ||||||
|  | 			goto retry; | ||||||
|  | 		} | ||||||
|  | 		/*
 | ||||||
|  | 		 * hwpoisoned entry is treated as no_page_table in | ||||||
|  | 		 * follow_page_mask(). | ||||||
|  | 		 */ | ||||||
|  | 	} | ||||||
|  | out: | ||||||
|  | 	spin_unlock(ptl); | ||||||
|  | 	return page; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||||||
| 			 struct page **pages, struct vm_area_struct **vmas, | 			 struct page **pages, struct vm_area_struct **vmas, | ||||||
| 			 unsigned long *position, unsigned long *nr_pages, | 			 unsigned long *position, unsigned long *nr_pages, | ||||||
|  | @ -7201,122 +7257,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) | ||||||
|  * These functions are overwritable if your architecture needs its own |  * These functions are overwritable if your architecture needs its own | ||||||
|  * behavior. |  * behavior. | ||||||
|  */ |  */ | ||||||
| struct page * __weak |  | ||||||
| follow_huge_addr(struct mm_struct *mm, unsigned long address, |  | ||||||
| 			      int write) |  | ||||||
| { |  | ||||||
| 	return ERR_PTR(-EINVAL); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct page * __weak |  | ||||||
| follow_huge_pd(struct vm_area_struct *vma, |  | ||||||
| 	       unsigned long address, hugepd_t hpd, int flags, int pdshift) |  | ||||||
| { |  | ||||||
| 	WARN(1, "hugepd follow called with no support for hugepage directory format\n"); |  | ||||||
| 	return NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct page * __weak |  | ||||||
| follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) |  | ||||||
| { |  | ||||||
| 	struct hstate *h = hstate_vma(vma); |  | ||||||
| 	struct mm_struct *mm = vma->vm_mm; |  | ||||||
| 	struct page *page = NULL; |  | ||||||
| 	spinlock_t *ptl; |  | ||||||
| 	pte_t *ptep, pte; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via |  | ||||||
| 	 * follow_hugetlb_page(). |  | ||||||
| 	 */ |  | ||||||
| 	if (WARN_ON_ONCE(flags & FOLL_PIN)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| retry: |  | ||||||
| 	ptep = huge_pte_offset(mm, address, huge_page_size(h)); |  | ||||||
| 	if (!ptep) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	ptl = huge_pte_lock(h, mm, ptep); |  | ||||||
| 	pte = huge_ptep_get(ptep); |  | ||||||
| 	if (pte_present(pte)) { |  | ||||||
| 		page = pte_page(pte) + |  | ||||||
| 			((address & ~huge_page_mask(h)) >> PAGE_SHIFT); |  | ||||||
| 		/*
 |  | ||||||
| 		 * try_grab_page() should always succeed here, because: a) we |  | ||||||
| 		 * hold the pmd (ptl) lock, and b) we've just checked that the |  | ||||||
| 		 * huge pmd (head) page is present in the page tables. The ptl |  | ||||||
| 		 * prevents the head page and tail pages from being rearranged |  | ||||||
| 		 * in any way. So this page must be available at this point, |  | ||||||
| 		 * unless the page refcount overflowed: |  | ||||||
| 		 */ |  | ||||||
| 		if (WARN_ON_ONCE(!try_grab_page(page, flags))) { |  | ||||||
| 			page = NULL; |  | ||||||
| 			goto out; |  | ||||||
| 		} |  | ||||||
| 	} else { |  | ||||||
| 		if (is_hugetlb_entry_migration(pte)) { |  | ||||||
| 			spin_unlock(ptl); |  | ||||||
| 			__migration_entry_wait_huge(ptep, ptl); |  | ||||||
| 			goto retry; |  | ||||||
| 		} |  | ||||||
| 		/*
 |  | ||||||
| 		 * hwpoisoned entry is treated as no_page_table in |  | ||||||
| 		 * follow_page_mask(). |  | ||||||
| 		 */ |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	spin_unlock(ptl); |  | ||||||
| 	return page; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct page * __weak |  | ||||||
| follow_huge_pud(struct mm_struct *mm, unsigned long address, |  | ||||||
| 		pud_t *pud, int flags) |  | ||||||
| { |  | ||||||
| 	struct page *page = NULL; |  | ||||||
| 	spinlock_t *ptl; |  | ||||||
| 	pte_t pte; |  | ||||||
| 
 |  | ||||||
| 	if (WARN_ON_ONCE(flags & FOLL_PIN)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| retry: |  | ||||||
| 	ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud); |  | ||||||
| 	if (!pud_huge(*pud)) |  | ||||||
| 		goto out; |  | ||||||
| 	pte = huge_ptep_get((pte_t *)pud); |  | ||||||
| 	if (pte_present(pte)) { |  | ||||||
| 		page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); |  | ||||||
| 		if (WARN_ON_ONCE(!try_grab_page(page, flags))) { |  | ||||||
| 			page = NULL; |  | ||||||
| 			goto out; |  | ||||||
| 		} |  | ||||||
| 	} else { |  | ||||||
| 		if (is_hugetlb_entry_migration(pte)) { |  | ||||||
| 			spin_unlock(ptl); |  | ||||||
| 			__migration_entry_wait(mm, (pte_t *)pud, ptl); |  | ||||||
| 			goto retry; |  | ||||||
| 		} |  | ||||||
| 		/*
 |  | ||||||
| 		 * hwpoisoned entry is treated as no_page_table in |  | ||||||
| 		 * follow_page_mask(). |  | ||||||
| 		 */ |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	spin_unlock(ptl); |  | ||||||
| 	return page; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct page * __weak |  | ||||||
| follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags) |  | ||||||
| { |  | ||||||
| 	if (flags & (FOLL_GET | FOLL_PIN)) |  | ||||||
| 		return NULL; |  | ||||||
| 
 |  | ||||||
| 	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int isolate_hugetlb(struct page *page, struct list_head *list) | int isolate_hugetlb(struct page *page, struct list_head *list) | ||||||
| { | { | ||||||
| 	int ret = 0; | 	int ret = 0; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Mike Kravetz
						Mike Kravetz