forked from mirrors/linux
		
	hugetlb: do early cow when page pinned on src mm
This is the last missing piece of the COW-during-fork effort when there're pinned pages found. One can reference70e806e4e6("mm: Do early cow for pinned pages during fork() for ptes", 2020-09-27) for more information, since we do similar things here rather than pte this time, but just for hugetlb. Note that after Jason's recent work on57efa1fe59("mm/gup: prevent gup_fast from racing with COW during fork", 2020-12-15) which is safer and easier to understand, we're safe now within the whole copy_page_range() against gup-fast, we don't need the wr-protect trick that proposed in70e806e4e6anymore. Link: https://lkml.kernel.org/r/20210217233547.93892-6-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Jason Gunthorpe <jgg@ziepe.ca> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: David Airlie <airlied@linux.ie> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Gal Pressman <galpress@amazon.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Kirill Shutemov <kirill@shutemov.name> Cc: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Roland Scheidegger <sroland@vmware.com> Cc: VMware Graphics <linux-graphics-maintainer@vmware.com> Cc: Wei Zhang <wzam@amazon.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									ca6eb14d64
								
							
						
					
					
						commit
						4eae4efa2c
					
				
					 1 changed files with 62 additions and 4 deletions
				
			
		
							
								
								
									
										66
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										66
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							| 
						 | 
					@ -3728,6 +3728,18 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void
 | 
				
			||||||
 | 
					hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
 | 
				
			||||||
 | 
							     struct page *new_page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__SetPageUptodate(new_page);
 | 
				
			||||||
 | 
						set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
 | 
				
			||||||
 | 
						hugepage_add_new_anon_rmap(new_page, vma, addr);
 | 
				
			||||||
 | 
						hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
 | 
				
			||||||
 | 
						ClearHPageRestoreReserve(new_page);
 | 
				
			||||||
 | 
						SetHPageMigratable(new_page);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
					int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
			    struct vm_area_struct *vma)
 | 
								    struct vm_area_struct *vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -3737,6 +3749,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
	bool cow = is_cow_mapping(vma->vm_flags);
 | 
						bool cow = is_cow_mapping(vma->vm_flags);
 | 
				
			||||||
	struct hstate *h = hstate_vma(vma);
 | 
						struct hstate *h = hstate_vma(vma);
 | 
				
			||||||
	unsigned long sz = huge_page_size(h);
 | 
						unsigned long sz = huge_page_size(h);
 | 
				
			||||||
 | 
						unsigned long npages = pages_per_huge_page(h);
 | 
				
			||||||
	struct address_space *mapping = vma->vm_file->f_mapping;
 | 
						struct address_space *mapping = vma->vm_file->f_mapping;
 | 
				
			||||||
	struct mmu_notifier_range range;
 | 
						struct mmu_notifier_range range;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
| 
						 | 
					@ -3785,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 | 
							spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 | 
				
			||||||
		entry = huge_ptep_get(src_pte);
 | 
							entry = huge_ptep_get(src_pte);
 | 
				
			||||||
		dst_entry = huge_ptep_get(dst_pte);
 | 
							dst_entry = huge_ptep_get(dst_pte);
 | 
				
			||||||
 | 
					again:
 | 
				
			||||||
		if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
 | 
							if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * Skip if src entry none.  Also, skip in the
 | 
								 * Skip if src entry none.  Also, skip in the
 | 
				
			||||||
| 
						 | 
					@ -3808,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
 | 
								set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
 | 
								entry = huge_ptep_get(src_pte);
 | 
				
			||||||
 | 
								ptepage = pte_page(entry);
 | 
				
			||||||
 | 
								get_page(ptepage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * This is a rare case where we see pinned hugetlb
 | 
				
			||||||
 | 
								 * pages while they're prone to COW.  We need to do the
 | 
				
			||||||
 | 
								 * COW earlier during fork.
 | 
				
			||||||
 | 
								 *
 | 
				
			||||||
 | 
								 * When pre-allocating the page or copying data, we
 | 
				
			||||||
 | 
								 * need to be without the pgtable locks since we could
 | 
				
			||||||
 | 
								 * sleep during the process.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
 | 
				
			||||||
 | 
									pte_t src_pte_old = entry;
 | 
				
			||||||
 | 
									struct page *new;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									spin_unlock(src_ptl);
 | 
				
			||||||
 | 
									spin_unlock(dst_ptl);
 | 
				
			||||||
 | 
									/* Do not use reserve as it's private owned */
 | 
				
			||||||
 | 
									new = alloc_huge_page(vma, addr, 1);
 | 
				
			||||||
 | 
									if (IS_ERR(new)) {
 | 
				
			||||||
 | 
										put_page(ptepage);
 | 
				
			||||||
 | 
										ret = PTR_ERR(new);
 | 
				
			||||||
 | 
										break;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									copy_user_huge_page(new, ptepage, addr, vma,
 | 
				
			||||||
 | 
											    npages);
 | 
				
			||||||
 | 
									put_page(ptepage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									/* Install the new huge page if src pte stable */
 | 
				
			||||||
 | 
									dst_ptl = huge_pte_lock(h, dst, dst_pte);
 | 
				
			||||||
 | 
									src_ptl = huge_pte_lockptr(h, src, src_pte);
 | 
				
			||||||
 | 
									spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 | 
				
			||||||
 | 
									entry = huge_ptep_get(src_pte);
 | 
				
			||||||
 | 
									if (!pte_same(src_pte_old, entry)) {
 | 
				
			||||||
 | 
										put_page(new);
 | 
				
			||||||
 | 
										/* dst_entry won't change as in child */
 | 
				
			||||||
 | 
										goto again;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									hugetlb_install_page(vma, dst_pte, addr, new);
 | 
				
			||||||
 | 
									spin_unlock(src_ptl);
 | 
				
			||||||
 | 
									spin_unlock(dst_ptl);
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (cow) {
 | 
								if (cow) {
 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * No need to notify as we are downgrading page
 | 
									 * No need to notify as we are downgrading page
 | 
				
			||||||
| 
						 | 
					@ -3818,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				huge_ptep_set_wrprotect(src, addr, src_pte);
 | 
									huge_ptep_set_wrprotect(src, addr, src_pte);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			entry = huge_ptep_get(src_pte);
 | 
					
 | 
				
			||||||
			ptepage = pte_page(entry);
 | 
					 | 
				
			||||||
			get_page(ptepage);
 | 
					 | 
				
			||||||
			page_dup_rmap(ptepage, true);
 | 
								page_dup_rmap(ptepage, true);
 | 
				
			||||||
			set_huge_pte_at(dst, addr, dst_pte, entry);
 | 
								set_huge_pte_at(dst, addr, dst_pte, entry);
 | 
				
			||||||
			hugetlb_count_add(pages_per_huge_page(h), dst);
 | 
								hugetlb_count_add(npages, dst);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		spin_unlock(src_ptl);
 | 
							spin_unlock(src_ptl);
 | 
				
			||||||
		spin_unlock(dst_ptl);
 | 
							spin_unlock(dst_ptl);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue