forked from mirrors/linux
		
	mm/hugetlb: handle uffd-wp during fork()
Firstly, we'll need to pass in dst_vma into copy_hugetlb_page_range() because for uffd-wp it's the dst vma that matters on deciding how we should treat uffd-wp protected ptes. We should recognize pte markers during fork and do the pte copy if needed. [lkp@intel.com: vma_needs_copy can be static] Link: https://lkml.kernel.org/r/Ylb0CGeFJlc4EzLk@7ec4ff11d4ae Link: https://lkml.kernel.org/r/20220405014918.14932-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Nadav Amit <nadav.amit@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									05e90bd05e
								
							
						
					
					
						commit
						bc70fbf269
					
				
					 3 changed files with 35 additions and 18 deletions
				
			
		| 
						 | 
					@ -137,7 +137,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 | 
				
			||||||
			     struct vm_area_struct *new_vma,
 | 
								     struct vm_area_struct *new_vma,
 | 
				
			||||||
			     unsigned long old_addr, unsigned long new_addr,
 | 
								     unsigned long old_addr, unsigned long new_addr,
 | 
				
			||||||
			     unsigned long len);
 | 
								     unsigned long len);
 | 
				
			||||||
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 | 
					int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 | 
				
			||||||
 | 
								    struct vm_area_struct *, struct vm_area_struct *);
 | 
				
			||||||
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 | 
					long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 | 
				
			||||||
			 struct page **, struct vm_area_struct **,
 | 
								 struct page **, struct vm_area_struct **,
 | 
				
			||||||
			 unsigned long *, unsigned long *, long, unsigned int,
 | 
								 unsigned long *, unsigned long *, long, unsigned int,
 | 
				
			||||||
| 
						 | 
					@ -269,7 +270,9 @@ static inline struct page *follow_huge_addr(struct mm_struct *mm,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
 | 
					static inline int copy_hugetlb_page_range(struct mm_struct *dst,
 | 
				
			||||||
			struct mm_struct *src, struct vm_area_struct *vma)
 | 
										  struct mm_struct *src,
 | 
				
			||||||
 | 
										  struct vm_area_struct *dst_vma,
 | 
				
			||||||
 | 
										  struct vm_area_struct *src_vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	BUG();
 | 
						BUG();
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										42
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										42
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							| 
						 | 
					@ -4719,23 +4719,24 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
					int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
			    struct vm_area_struct *vma)
 | 
								    struct vm_area_struct *dst_vma,
 | 
				
			||||||
 | 
								    struct vm_area_struct *src_vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pte_t *src_pte, *dst_pte, entry, dst_entry;
 | 
						pte_t *src_pte, *dst_pte, entry, dst_entry;
 | 
				
			||||||
	struct page *ptepage;
 | 
						struct page *ptepage;
 | 
				
			||||||
	unsigned long addr;
 | 
						unsigned long addr;
 | 
				
			||||||
	bool cow = is_cow_mapping(vma->vm_flags);
 | 
						bool cow = is_cow_mapping(src_vma->vm_flags);
 | 
				
			||||||
	struct hstate *h = hstate_vma(vma);
 | 
						struct hstate *h = hstate_vma(src_vma);
 | 
				
			||||||
	unsigned long sz = huge_page_size(h);
 | 
						unsigned long sz = huge_page_size(h);
 | 
				
			||||||
	unsigned long npages = pages_per_huge_page(h);
 | 
						unsigned long npages = pages_per_huge_page(h);
 | 
				
			||||||
	struct address_space *mapping = vma->vm_file->f_mapping;
 | 
						struct address_space *mapping = src_vma->vm_file->f_mapping;
 | 
				
			||||||
	struct mmu_notifier_range range;
 | 
						struct mmu_notifier_range range;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (cow) {
 | 
						if (cow) {
 | 
				
			||||||
		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
 | 
							mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_vma, src,
 | 
				
			||||||
					vma->vm_start,
 | 
										src_vma->vm_start,
 | 
				
			||||||
					vma->vm_end);
 | 
										src_vma->vm_end);
 | 
				
			||||||
		mmu_notifier_invalidate_range_start(&range);
 | 
							mmu_notifier_invalidate_range_start(&range);
 | 
				
			||||||
		mmap_assert_write_locked(src);
 | 
							mmap_assert_write_locked(src);
 | 
				
			||||||
		raw_write_seqcount_begin(&src->write_protect_seq);
 | 
							raw_write_seqcount_begin(&src->write_protect_seq);
 | 
				
			||||||
| 
						 | 
					@ -4749,12 +4750,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
		i_mmap_lock_read(mapping);
 | 
							i_mmap_lock_read(mapping);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
 | 
						for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
 | 
				
			||||||
		spinlock_t *src_ptl, *dst_ptl;
 | 
							spinlock_t *src_ptl, *dst_ptl;
 | 
				
			||||||
		src_pte = huge_pte_offset(src, addr, sz);
 | 
							src_pte = huge_pte_offset(src, addr, sz);
 | 
				
			||||||
		if (!src_pte)
 | 
							if (!src_pte)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		dst_pte = huge_pte_alloc(dst, vma, addr, sz);
 | 
							dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
 | 
				
			||||||
		if (!dst_pte) {
 | 
							if (!dst_pte) {
 | 
				
			||||||
			ret = -ENOMEM;
 | 
								ret = -ENOMEM;
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
| 
						 | 
					@ -4789,6 +4790,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
		} else if (unlikely(is_hugetlb_entry_migration(entry) ||
 | 
							} else if (unlikely(is_hugetlb_entry_migration(entry) ||
 | 
				
			||||||
				    is_hugetlb_entry_hwpoisoned(entry))) {
 | 
									    is_hugetlb_entry_hwpoisoned(entry))) {
 | 
				
			||||||
			swp_entry_t swp_entry = pte_to_swp_entry(entry);
 | 
								swp_entry_t swp_entry = pte_to_swp_entry(entry);
 | 
				
			||||||
 | 
								bool uffd_wp = huge_pte_uffd_wp(entry);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (!is_readable_migration_entry(swp_entry) && cow) {
 | 
								if (!is_readable_migration_entry(swp_entry) && cow) {
 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
| 
						 | 
					@ -4798,10 +4800,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
				swp_entry = make_readable_migration_entry(
 | 
									swp_entry = make_readable_migration_entry(
 | 
				
			||||||
							swp_offset(swp_entry));
 | 
												swp_offset(swp_entry));
 | 
				
			||||||
				entry = swp_entry_to_pte(swp_entry);
 | 
									entry = swp_entry_to_pte(swp_entry);
 | 
				
			||||||
 | 
									if (userfaultfd_wp(src_vma) && uffd_wp)
 | 
				
			||||||
 | 
										entry = huge_pte_mkuffd_wp(entry);
 | 
				
			||||||
				set_huge_swap_pte_at(src, addr, src_pte,
 | 
									set_huge_swap_pte_at(src, addr, src_pte,
 | 
				
			||||||
						     entry, sz);
 | 
											     entry, sz);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								if (!userfaultfd_wp(dst_vma) && uffd_wp)
 | 
				
			||||||
 | 
									entry = huge_pte_clear_uffd_wp(entry);
 | 
				
			||||||
			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
 | 
								set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
 | 
				
			||||||
 | 
							} else if (unlikely(is_pte_marker(entry))) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * We copy the pte marker only if the dst vma has
 | 
				
			||||||
 | 
								 * uffd-wp enabled.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (userfaultfd_wp(dst_vma))
 | 
				
			||||||
 | 
									set_huge_pte_at(dst, addr, dst_pte, entry);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			entry = huge_ptep_get(src_pte);
 | 
								entry = huge_ptep_get(src_pte);
 | 
				
			||||||
			ptepage = pte_page(entry);
 | 
								ptepage = pte_page(entry);
 | 
				
			||||||
| 
						 | 
					@ -4819,20 +4832,21 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			if (!PageAnon(ptepage)) {
 | 
								if (!PageAnon(ptepage)) {
 | 
				
			||||||
				page_dup_file_rmap(ptepage, true);
 | 
									page_dup_file_rmap(ptepage, true);
 | 
				
			||||||
			} else if (page_try_dup_anon_rmap(ptepage, true, vma)) {
 | 
								} else if (page_try_dup_anon_rmap(ptepage, true,
 | 
				
			||||||
 | 
												  src_vma)) {
 | 
				
			||||||
				pte_t src_pte_old = entry;
 | 
									pte_t src_pte_old = entry;
 | 
				
			||||||
				struct page *new;
 | 
									struct page *new;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				spin_unlock(src_ptl);
 | 
									spin_unlock(src_ptl);
 | 
				
			||||||
				spin_unlock(dst_ptl);
 | 
									spin_unlock(dst_ptl);
 | 
				
			||||||
				/* Do not use reserve as it's private owned */
 | 
									/* Do not use reserve as it's private owned */
 | 
				
			||||||
				new = alloc_huge_page(vma, addr, 1);
 | 
									new = alloc_huge_page(dst_vma, addr, 1);
 | 
				
			||||||
				if (IS_ERR(new)) {
 | 
									if (IS_ERR(new)) {
 | 
				
			||||||
					put_page(ptepage);
 | 
										put_page(ptepage);
 | 
				
			||||||
					ret = PTR_ERR(new);
 | 
										ret = PTR_ERR(new);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				copy_user_huge_page(new, ptepage, addr, vma,
 | 
									copy_user_huge_page(new, ptepage, addr, dst_vma,
 | 
				
			||||||
						    npages);
 | 
											    npages);
 | 
				
			||||||
				put_page(ptepage);
 | 
									put_page(ptepage);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4842,13 +4856,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 | 
				
			||||||
				spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 | 
									spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 | 
				
			||||||
				entry = huge_ptep_get(src_pte);
 | 
									entry = huge_ptep_get(src_pte);
 | 
				
			||||||
				if (!pte_same(src_pte_old, entry)) {
 | 
									if (!pte_same(src_pte_old, entry)) {
 | 
				
			||||||
					restore_reserve_on_error(h, vma, addr,
 | 
										restore_reserve_on_error(h, dst_vma, addr,
 | 
				
			||||||
								new);
 | 
													new);
 | 
				
			||||||
					put_page(new);
 | 
										put_page(new);
 | 
				
			||||||
					/* dst_entry won't change as in child */
 | 
										/* dst_entry won't change as in child */
 | 
				
			||||||
					goto again;
 | 
										goto again;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				hugetlb_install_page(vma, dst_pte, addr, new);
 | 
									hugetlb_install_page(dst_vma, dst_pte, addr, new);
 | 
				
			||||||
				spin_unlock(src_ptl);
 | 
									spin_unlock(src_ptl);
 | 
				
			||||||
				spin_unlock(dst_ptl);
 | 
									spin_unlock(dst_ptl);
 | 
				
			||||||
				continue;
 | 
									continue;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1234,7 +1234,7 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 | 
				
			||||||
 * false when we can speed up fork() by allowing lazy page faults later until
 | 
					 * false when we can speed up fork() by allowing lazy page faults later until
 | 
				
			||||||
 * when the child accesses the memory range.
 | 
					 * when the child accesses the memory range.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
bool
 | 
					static bool
 | 
				
			||||||
vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 | 
					vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -1278,7 +1278,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (is_vm_hugetlb_page(src_vma))
 | 
						if (is_vm_hugetlb_page(src_vma))
 | 
				
			||||||
		return copy_hugetlb_page_range(dst_mm, src_mm, src_vma);
 | 
							return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
 | 
						if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue