mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	hugetlb: do early cow when page pinned on src mm
This is the last missing piece of the COW-during-fork effort when there're pinned pages found. One can reference70e806e4e6("mm: Do early cow for pinned pages during fork() for ptes", 2020-09-27) for more information, since we do similar things here rather than pte this time, but just for hugetlb. Note that after Jason's recent work on57efa1fe59("mm/gup: prevent gup_fast from racing with COW during fork", 2020-12-15) which is safer and easier to understand, we're safe now within the whole copy_page_range() against gup-fast, we don't need the wr-protect trick that proposed in70e806e4e6anymore. Link: https://lkml.kernel.org/r/20210217233547.93892-6-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Jason Gunthorpe <jgg@ziepe.ca> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: David Airlie <airlied@linux.ie> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Gal Pressman <galpress@amazon.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Kirill Shutemov <kirill@shutemov.name> Cc: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Roland Scheidegger <sroland@vmware.com> Cc: VMware Graphics <linux-graphics-maintainer@vmware.com> Cc: Wei Zhang <wzam@amazon.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									ca6eb14d64
								
							
						
					
					
						commit
						4eae4efa2c
					
				
					 1 changed files with 62 additions and 4 deletions
				
			
		
							
								
								
									
										66
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										66
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							|  | @ -3728,6 +3728,18 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) | |||
| 		return false; | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, | ||||
| 		     struct page *new_page) | ||||
| { | ||||
| 	__SetPageUptodate(new_page); | ||||
| 	set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); | ||||
| 	hugepage_add_new_anon_rmap(new_page, vma, addr); | ||||
| 	hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); | ||||
| 	ClearHPageRestoreReserve(new_page); | ||||
| 	SetHPageMigratable(new_page); | ||||
| } | ||||
| 
 | ||||
| int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | ||||
| 			    struct vm_area_struct *vma) | ||||
| { | ||||
|  | @ -3737,6 +3749,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 	bool cow = is_cow_mapping(vma->vm_flags); | ||||
| 	struct hstate *h = hstate_vma(vma); | ||||
| 	unsigned long sz = huge_page_size(h); | ||||
| 	unsigned long npages = pages_per_huge_page(h); | ||||
| 	struct address_space *mapping = vma->vm_file->f_mapping; | ||||
| 	struct mmu_notifier_range range; | ||||
| 	int ret = 0; | ||||
|  | @ -3785,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | ||||
| 		entry = huge_ptep_get(src_pte); | ||||
| 		dst_entry = huge_ptep_get(dst_pte); | ||||
| again: | ||||
| 		if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { | ||||
| 			/*
 | ||||
| 			 * Skip if src entry none.  Also, skip in the | ||||
|  | @ -3808,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 			} | ||||
| 			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); | ||||
| 		} else { | ||||
| 			entry = huge_ptep_get(src_pte); | ||||
| 			ptepage = pte_page(entry); | ||||
| 			get_page(ptepage); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * This is a rare case where we see pinned hugetlb | ||||
| 			 * pages while they're prone to COW.  We need to do the | ||||
| 			 * COW earlier during fork. | ||||
| 			 * | ||||
| 			 * When pre-allocating the page or copying data, we | ||||
| 			 * need to be without the pgtable locks since we could | ||||
| 			 * sleep during the process. | ||||
| 			 */ | ||||
| 			if (unlikely(page_needs_cow_for_dma(vma, ptepage))) { | ||||
| 				pte_t src_pte_old = entry; | ||||
| 				struct page *new; | ||||
| 
 | ||||
| 				spin_unlock(src_ptl); | ||||
| 				spin_unlock(dst_ptl); | ||||
| 				/* Do not use reserve as it's private owned */ | ||||
| 				new = alloc_huge_page(vma, addr, 1); | ||||
| 				if (IS_ERR(new)) { | ||||
| 					put_page(ptepage); | ||||
| 					ret = PTR_ERR(new); | ||||
| 					break; | ||||
| 				} | ||||
| 				copy_user_huge_page(new, ptepage, addr, vma, | ||||
| 						    npages); | ||||
| 				put_page(ptepage); | ||||
| 
 | ||||
| 				/* Install the new huge page if src pte stable */ | ||||
| 				dst_ptl = huge_pte_lock(h, dst, dst_pte); | ||||
| 				src_ptl = huge_pte_lockptr(h, src, src_pte); | ||||
| 				spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | ||||
| 				entry = huge_ptep_get(src_pte); | ||||
| 				if (!pte_same(src_pte_old, entry)) { | ||||
| 					put_page(new); | ||||
| 					/* dst_entry won't change as in child */ | ||||
| 					goto again; | ||||
| 				} | ||||
| 				hugetlb_install_page(vma, dst_pte, addr, new); | ||||
| 				spin_unlock(src_ptl); | ||||
| 				spin_unlock(dst_ptl); | ||||
| 				continue; | ||||
| 			} | ||||
| 
 | ||||
| 			if (cow) { | ||||
| 				/*
 | ||||
| 				 * No need to notify as we are downgrading page | ||||
|  | @ -3818,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 				 */ | ||||
| 				huge_ptep_set_wrprotect(src, addr, src_pte); | ||||
| 			} | ||||
| 			entry = huge_ptep_get(src_pte); | ||||
| 			ptepage = pte_page(entry); | ||||
| 			get_page(ptepage); | ||||
| 
 | ||||
| 			page_dup_rmap(ptepage, true); | ||||
| 			set_huge_pte_at(dst, addr, dst_pte, entry); | ||||
| 			hugetlb_count_add(pages_per_huge_page(h), dst); | ||||
| 			hugetlb_count_add(npages, dst); | ||||
| 		} | ||||
| 		spin_unlock(src_ptl); | ||||
| 		spin_unlock(dst_ptl); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Peter Xu
						Peter Xu