mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	userfaultfd: wp: support swap and page migration
For either swap and page migration, we all use the bit 2 of the entry to identify whether this entry is uffd write-protected. It plays a similar role as the existing soft dirty bit in swap entries but only for keeping the uffd-wp tracking for a specific PTE/PMD. Something special here is that when we want to recover the uffd-wp bit from a swap/migration entry to the PTE bit we'll also need to take care of the _PAGE_RW bit and make sure it's cleared, otherwise even with the _PAGE_UFFD_WP bit we can't trap it at all. In change_pte_range() we do nothing for uffd if the PTE is a swap entry. That can lead to data mismatch if the page that we are going to write protect is swapped out when sending the UFFDIO_WRITEPROTECT. This patch also applies/removes the uffd-wp bit even for the swap entries. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bobby Powers <bobbypowers@gmail.com> Cc: Brian Geffon <bgeffon@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Denis Plotnikov <dplotnikov@virtuozzo.com> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Martin Cracauer <cracauer@cons.org> Cc: Marty McFadden <mcfadden8@llnl.gov> Cc: Maya Gokhale <gokhale2@llnl.gov> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@fb.com> Link: http://lkml.kernel.org/r/20200220163112.11409-11-peterx@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									2e3d5dc508
								
							
						
					
					
						commit
						f45ec5ff16
					
				
					 6 changed files with 42 additions and 11 deletions
				
			
		| 
						 | 
					@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pte_swp_soft_dirty(pte))
 | 
						if (pte_swp_soft_dirty(pte))
 | 
				
			||||||
		pte = pte_swp_clear_soft_dirty(pte);
 | 
							pte = pte_swp_clear_soft_dirty(pte);
 | 
				
			||||||
 | 
						if (pte_swp_uffd_wp(pte))
 | 
				
			||||||
 | 
							pte = pte_swp_clear_uffd_wp(pte);
 | 
				
			||||||
	arch_entry = __pte_to_swp_entry(pte);
 | 
						arch_entry = __pte_to_swp_entry(pte);
 | 
				
			||||||
	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 | 
						return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2297,6 +2297,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 | 
				
			||||||
		write = is_write_migration_entry(entry);
 | 
							write = is_write_migration_entry(entry);
 | 
				
			||||||
		young = false;
 | 
							young = false;
 | 
				
			||||||
		soft_dirty = pmd_swp_soft_dirty(old_pmd);
 | 
							soft_dirty = pmd_swp_soft_dirty(old_pmd);
 | 
				
			||||||
 | 
							uffd_wp = pmd_swp_uffd_wp(old_pmd);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		page = pmd_page(old_pmd);
 | 
							page = pmd_page(old_pmd);
 | 
				
			||||||
		if (pmd_dirty(old_pmd))
 | 
							if (pmd_dirty(old_pmd))
 | 
				
			||||||
| 
						 | 
					@ -2329,6 +2330,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 | 
				
			||||||
			entry = swp_entry_to_pte(swp_entry);
 | 
								entry = swp_entry_to_pte(swp_entry);
 | 
				
			||||||
			if (soft_dirty)
 | 
								if (soft_dirty)
 | 
				
			||||||
				entry = pte_swp_mksoft_dirty(entry);
 | 
									entry = pte_swp_mksoft_dirty(entry);
 | 
				
			||||||
 | 
								if (uffd_wp)
 | 
				
			||||||
 | 
									entry = pte_swp_mkuffd_wp(entry);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 | 
								entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
 | 
				
			||||||
			entry = maybe_mkwrite(entry, vma);
 | 
								entry = maybe_mkwrite(entry, vma);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -733,6 +733,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
				
			||||||
				pte = swp_entry_to_pte(entry);
 | 
									pte = swp_entry_to_pte(entry);
 | 
				
			||||||
				if (pte_swp_soft_dirty(*src_pte))
 | 
									if (pte_swp_soft_dirty(*src_pte))
 | 
				
			||||||
					pte = pte_swp_mksoft_dirty(pte);
 | 
										pte = pte_swp_mksoft_dirty(pte);
 | 
				
			||||||
 | 
									if (pte_swp_uffd_wp(*src_pte))
 | 
				
			||||||
 | 
										pte = pte_swp_mkuffd_wp(pte);
 | 
				
			||||||
				set_pte_at(src_mm, addr, src_pte, pte);
 | 
									set_pte_at(src_mm, addr, src_pte, pte);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else if (is_device_private_entry(entry)) {
 | 
							} else if (is_device_private_entry(entry)) {
 | 
				
			||||||
| 
						 | 
					@ -762,6 +764,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
				
			||||||
			    is_cow_mapping(vm_flags)) {
 | 
								    is_cow_mapping(vm_flags)) {
 | 
				
			||||||
				make_device_private_entry_read(&entry);
 | 
									make_device_private_entry_read(&entry);
 | 
				
			||||||
				pte = swp_entry_to_pte(entry);
 | 
									pte = swp_entry_to_pte(entry);
 | 
				
			||||||
 | 
									if (pte_swp_uffd_wp(*src_pte))
 | 
				
			||||||
 | 
										pte = pte_swp_mkuffd_wp(pte);
 | 
				
			||||||
				set_pte_at(src_mm, addr, src_pte, pte);
 | 
									set_pte_at(src_mm, addr, src_pte, pte);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -3098,6 +3102,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 | 
				
			||||||
	flush_icache_page(vma, page);
 | 
						flush_icache_page(vma, page);
 | 
				
			||||||
	if (pte_swp_soft_dirty(vmf->orig_pte))
 | 
						if (pte_swp_soft_dirty(vmf->orig_pte))
 | 
				
			||||||
		pte = pte_mksoft_dirty(pte);
 | 
							pte = pte_mksoft_dirty(pte);
 | 
				
			||||||
 | 
						if (pte_swp_uffd_wp(vmf->orig_pte)) {
 | 
				
			||||||
 | 
							pte = pte_mkuffd_wp(pte);
 | 
				
			||||||
 | 
							pte = pte_wrprotect(pte);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
 | 
						set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
 | 
				
			||||||
	arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
 | 
						arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
 | 
				
			||||||
	vmf->orig_pte = pte;
 | 
						vmf->orig_pte = pte;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -243,11 +243,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 | 
				
			||||||
		entry = pte_to_swp_entry(*pvmw.pte);
 | 
							entry = pte_to_swp_entry(*pvmw.pte);
 | 
				
			||||||
		if (is_write_migration_entry(entry))
 | 
							if (is_write_migration_entry(entry))
 | 
				
			||||||
			pte = maybe_mkwrite(pte, vma);
 | 
								pte = maybe_mkwrite(pte, vma);
 | 
				
			||||||
 | 
							else if (pte_swp_uffd_wp(*pvmw.pte))
 | 
				
			||||||
 | 
								pte = pte_mkuffd_wp(pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (unlikely(is_zone_device_page(new))) {
 | 
							if (unlikely(is_zone_device_page(new))) {
 | 
				
			||||||
			if (is_device_private_page(new)) {
 | 
								if (is_device_private_page(new)) {
 | 
				
			||||||
				entry = make_device_private_entry(new, pte_write(pte));
 | 
									entry = make_device_private_entry(new, pte_write(pte));
 | 
				
			||||||
				pte = swp_entry_to_pte(entry);
 | 
									pte = swp_entry_to_pte(entry);
 | 
				
			||||||
 | 
									if (pte_swp_uffd_wp(*pvmw.pte))
 | 
				
			||||||
 | 
										pte = pte_mkuffd_wp(pte);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2338,6 +2342,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 | 
				
			||||||
			swp_pte = swp_entry_to_pte(entry);
 | 
								swp_pte = swp_entry_to_pte(entry);
 | 
				
			||||||
			if (pte_soft_dirty(pte))
 | 
								if (pte_soft_dirty(pte))
 | 
				
			||||||
				swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
									swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
				
			||||||
 | 
								if (pte_uffd_wp(pte))
 | 
				
			||||||
 | 
									swp_pte = pte_swp_mkuffd_wp(swp_pte);
 | 
				
			||||||
			set_pte_at(mm, addr, ptep, swp_pte);
 | 
								set_pte_at(mm, addr, ptep, swp_pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -139,11 +139,11 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
 | 
								ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
 | 
				
			||||||
			pages++;
 | 
								pages++;
 | 
				
			||||||
		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 | 
							} else if (is_swap_pte(oldpte)) {
 | 
				
			||||||
			swp_entry_t entry = pte_to_swp_entry(oldpte);
 | 
								swp_entry_t entry = pte_to_swp_entry(oldpte);
 | 
				
			||||||
 | 
								pte_t newpte;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (is_write_migration_entry(entry)) {
 | 
								if (is_write_migration_entry(entry)) {
 | 
				
			||||||
				pte_t newpte;
 | 
					 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * A protection check is difficult so
 | 
									 * A protection check is difficult so
 | 
				
			||||||
				 * just be safe and disable write
 | 
									 * just be safe and disable write
 | 
				
			||||||
| 
						 | 
					@ -152,22 +152,28 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 | 
				
			||||||
				newpte = swp_entry_to_pte(entry);
 | 
									newpte = swp_entry_to_pte(entry);
 | 
				
			||||||
				if (pte_swp_soft_dirty(oldpte))
 | 
									if (pte_swp_soft_dirty(oldpte))
 | 
				
			||||||
					newpte = pte_swp_mksoft_dirty(newpte);
 | 
										newpte = pte_swp_mksoft_dirty(newpte);
 | 
				
			||||||
				set_pte_at(vma->vm_mm, addr, pte, newpte);
 | 
									if (pte_swp_uffd_wp(oldpte))
 | 
				
			||||||
 | 
										newpte = pte_swp_mkuffd_wp(newpte);
 | 
				
			||||||
				pages++;
 | 
								} else if (is_write_device_private_entry(entry)) {
 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (is_write_device_private_entry(entry)) {
 | 
					 | 
				
			||||||
				pte_t newpte;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * We do not preserve soft-dirtiness. See
 | 
									 * We do not preserve soft-dirtiness. See
 | 
				
			||||||
				 * copy_one_pte() for explanation.
 | 
									 * copy_one_pte() for explanation.
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				make_device_private_entry_read(&entry);
 | 
									make_device_private_entry_read(&entry);
 | 
				
			||||||
				newpte = swp_entry_to_pte(entry);
 | 
									newpte = swp_entry_to_pte(entry);
 | 
				
			||||||
				set_pte_at(vma->vm_mm, addr, pte, newpte);
 | 
									if (pte_swp_uffd_wp(oldpte))
 | 
				
			||||||
 | 
										newpte = pte_swp_mkuffd_wp(newpte);
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									newpte = oldpte;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (uffd_wp)
 | 
				
			||||||
 | 
									newpte = pte_swp_mkuffd_wp(newpte);
 | 
				
			||||||
 | 
								else if (uffd_wp_resolve)
 | 
				
			||||||
 | 
									newpte = pte_swp_clear_uffd_wp(newpte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (!pte_same(oldpte, newpte)) {
 | 
				
			||||||
 | 
									set_pte_at(vma->vm_mm, addr, pte, newpte);
 | 
				
			||||||
				pages++;
 | 
									pages++;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1502,6 +1502,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 | 
				
			||||||
			swp_pte = swp_entry_to_pte(entry);
 | 
								swp_pte = swp_entry_to_pte(entry);
 | 
				
			||||||
			if (pte_soft_dirty(pteval))
 | 
								if (pte_soft_dirty(pteval))
 | 
				
			||||||
				swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
									swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
				
			||||||
 | 
								if (pte_uffd_wp(pteval))
 | 
				
			||||||
 | 
									swp_pte = pte_swp_mkuffd_wp(swp_pte);
 | 
				
			||||||
			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 | 
								set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * No need to invalidate here it will synchronize on
 | 
								 * No need to invalidate here it will synchronize on
 | 
				
			||||||
| 
						 | 
					@ -1601,6 +1603,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 | 
				
			||||||
			swp_pte = swp_entry_to_pte(entry);
 | 
								swp_pte = swp_entry_to_pte(entry);
 | 
				
			||||||
			if (pte_soft_dirty(pteval))
 | 
								if (pte_soft_dirty(pteval))
 | 
				
			||||||
				swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
									swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
				
			||||||
 | 
								if (pte_uffd_wp(pteval))
 | 
				
			||||||
 | 
									swp_pte = pte_swp_mkuffd_wp(swp_pte);
 | 
				
			||||||
			set_pte_at(mm, address, pvmw.pte, swp_pte);
 | 
								set_pte_at(mm, address, pvmw.pte, swp_pte);
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * No need to invalidate here it will synchronize on
 | 
								 * No need to invalidate here it will synchronize on
 | 
				
			||||||
| 
						 | 
					@ -1667,6 +1671,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 | 
				
			||||||
			swp_pte = swp_entry_to_pte(entry);
 | 
								swp_pte = swp_entry_to_pte(entry);
 | 
				
			||||||
			if (pte_soft_dirty(pteval))
 | 
								if (pte_soft_dirty(pteval))
 | 
				
			||||||
				swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
									swp_pte = pte_swp_mksoft_dirty(swp_pte);
 | 
				
			||||||
 | 
								if (pte_uffd_wp(pteval))
 | 
				
			||||||
 | 
									swp_pte = pte_swp_mkuffd_wp(swp_pte);
 | 
				
			||||||
			set_pte_at(mm, address, pvmw.pte, swp_pte);
 | 
								set_pte_at(mm, address, pvmw.pte, swp_pte);
 | 
				
			||||||
			/* Invalidate as we cleared the pte */
 | 
								/* Invalidate as we cleared the pte */
 | 
				
			||||||
			mmu_notifier_invalidate_range(mm, address,
 | 
								mmu_notifier_invalidate_range(mm, address,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue