forked from mirrors/linux
mm/gup: handle hugetlb in the generic follow_page_mask code
Now follow_page() is ready to handle hugetlb pages in whatever form, and over all architectures. Switch to the generic code path. Time to retire hugetlb_follow_page_mask(), following the previous retirement of follow_hugetlb_page() in4849807114. There may be a slight difference of how the loops run when processing slow GUP over a large hugetlb range on cont_pte/cont_pmd supported archs: each loop of __get_user_pages() will resolve one pgtable entry with the patch applied, rather than relying on the size of hugetlb hstate, the latter may cover multiple entries in one loop. A quick performance test on an aarch64 VM on M1 chip shows 15% degrade over a tight loop of slow gup after the path switched. That shouldn't be a problem because slow-gup should not be a hot path for GUP in general: when page is commonly present, fast-gup will already succeed, while when the page is indeed missing and require a follow up page fault, the slow gup degrade will probably buried in the fault paths anyway. It also explains why slow gup for THP used to be very slow before57edfcfd34("mm/gup: accelerate thp gup even for "pages != NULL"") lands, the latter not part of a performance analysis but a side benefit. If the performance will be a concern, we can consider handle CONT_PTE in follow_page(). Before that is justified to be necessary, keep everything clean and simple. Link: https://lkml.kernel.org/r/20240327152332.950956-14-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Tested-by: Ryan Roberts <ryan.roberts@arm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrew Jones <andrew.jones@linux.dev> Cc: Aneesh Kumar K.V (IBM) <aneesh.kumar@kernel.org> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: James Houghton <jthoughton@google.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: "Mike Rapoport (IBM)" <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yang Shi <shy828301@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
a12083d721
commit
9cb28da546
3 changed files with 5 additions and 88 deletions
|
|
@ -328,13 +328,6 @@ static inline void hugetlb_zap_end(
|
|||
{
|
||||
}
|
||||
|
||||
static inline struct page *hugetlb_follow_page_mask(
|
||||
struct vm_area_struct *vma, unsigned long address, unsigned int flags,
|
||||
unsigned int *page_mask)
|
||||
{
|
||||
BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
|
||||
}
|
||||
|
||||
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
|
||||
struct mm_struct *src,
|
||||
struct vm_area_struct *dst_vma,
|
||||
|
|
|
|||
15
mm/gup.c
15
mm/gup.c
|
|
@ -1132,18 +1132,11 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
|
|||
{
|
||||
pgd_t *pgd;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct page *page;
|
||||
|
||||
vma_pgtable_walk_begin(vma);
|
||||
|
||||
ctx->page_mask = 0;
|
||||
|
||||
/*
|
||||
* Call hugetlb_follow_page_mask for hugetlb vmas as it will use
|
||||
* special hugetlb page table walking code. This eliminates the
|
||||
* need to check for hugetlb entries in the general walking code.
|
||||
*/
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
return hugetlb_follow_page_mask(vma, address, flags,
|
||||
&ctx->page_mask);
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
|
||||
if (unlikely(is_hugepd(__hugepd(pgd_val(*pgd)))))
|
||||
|
|
@ -1154,6 +1147,8 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
|
|||
else
|
||||
page = follow_p4d_mask(vma, address, pgd, flags, ctx);
|
||||
|
||||
vma_pgtable_walk_end(vma);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
|
|
|
|||
71
mm/hugetlb.c
71
mm/hugetlb.c
|
|
@ -6876,77 +6876,6 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
|
|||
}
|
||||
#endif /* CONFIG_USERFAULTFD */
|
||||
|
||||
struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags,
|
||||
unsigned int *page_mask)
|
||||
{
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long haddr = address & huge_page_mask(h);
|
||||
struct page *page = NULL;
|
||||
spinlock_t *ptl;
|
||||
pte_t *pte, entry;
|
||||
int ret;
|
||||
|
||||
hugetlb_vma_lock_read(vma);
|
||||
pte = hugetlb_walk(vma, haddr, huge_page_size(h));
|
||||
if (!pte)
|
||||
goto out_unlock;
|
||||
|
||||
ptl = huge_pte_lock(h, mm, pte);
|
||||
entry = huge_ptep_get(pte);
|
||||
if (pte_present(entry)) {
|
||||
page = pte_page(entry);
|
||||
|
||||
if (!huge_pte_write(entry)) {
|
||||
if (flags & FOLL_WRITE) {
|
||||
page = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gup_must_unshare(vma, flags, page)) {
|
||||
/* Tell the caller to do unsharing */
|
||||
page = ERR_PTR(-EMLINK);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT));
|
||||
|
||||
/*
|
||||
* Note that page may be a sub-page, and with vmemmap
|
||||
* optimizations the page struct may be read only.
|
||||
* try_grab_page() will increase the ref count on the
|
||||
* head page, so this will be OK.
|
||||
*
|
||||
* try_grab_page() should always be able to get the page here,
|
||||
* because we hold the ptl lock and have verified pte_present().
|
||||
*/
|
||||
ret = try_grab_page(page, flags);
|
||||
|
||||
if (WARN_ON_ONCE(ret)) {
|
||||
page = ERR_PTR(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*page_mask = (1U << huge_page_order(h)) - 1;
|
||||
}
|
||||
out:
|
||||
spin_unlock(ptl);
|
||||
out_unlock:
|
||||
hugetlb_vma_unlock_read(vma);
|
||||
|
||||
/*
|
||||
* Fixup retval for dump requests: if pagecache doesn't exist,
|
||||
* don't try to allocate a new page but just skip it.
|
||||
*/
|
||||
if (!page && (flags & FOLL_DUMP) &&
|
||||
!hugetlbfs_pagecache_present(h, vma, address))
|
||||
page = ERR_PTR(-EFAULT);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned long end,
|
||||
pgprot_t newprot, unsigned long cp_flags)
|
||||
|
|
|
|||
Loading…
Reference in a new issue