mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support
hugetlb_mcopy_atomic_pte is the low level routine that implements the userfaultfd UFFDIO_COPY command. It is based on the existing mcopy_atomic_pte routine with modifications for huge pages. Link: http://lkml.kernel.org/r/20161216144821.5183-18-aarcange@redhat.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Michael Rapoport <RAPOPORT@il.ibm.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									fa4d75c1de
								
							
						
					
					
						commit
						8fb5debc5f
					
				
					 2 changed files with 88 additions and 0 deletions
				
			
		| 
						 | 
					@ -81,6 +81,11 @@ void hugetlb_show_meminfo(void);
 | 
				
			||||||
unsigned long hugetlb_total_pages(void);
 | 
					unsigned long hugetlb_total_pages(void);
 | 
				
			||||||
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
			unsigned long address, unsigned int flags);
 | 
								unsigned long address, unsigned int flags);
 | 
				
			||||||
 | 
					int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 | 
				
			||||||
 | 
									struct vm_area_struct *dst_vma,
 | 
				
			||||||
 | 
									unsigned long dst_addr,
 | 
				
			||||||
 | 
									unsigned long src_addr,
 | 
				
			||||||
 | 
									struct page **pagep);
 | 
				
			||||||
int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 | 
					int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 | 
				
			||||||
						struct vm_area_struct *vma,
 | 
											struct vm_area_struct *vma,
 | 
				
			||||||
						vm_flags_t vm_flags);
 | 
											vm_flags_t vm_flags);
 | 
				
			||||||
| 
						 | 
					@ -149,6 +154,8 @@ static inline void hugetlb_show_meminfo(void)
 | 
				
			||||||
#define is_hugepage_only_range(mm, addr, len)	0
 | 
					#define is_hugepage_only_range(mm, addr, len)	0
 | 
				
			||||||
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 | 
					#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 | 
				
			||||||
#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 | 
					#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 | 
				
			||||||
 | 
					#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 | 
				
			||||||
 | 
									src_addr, pagep)	({ BUG(); 0; })
 | 
				
			||||||
#define huge_pte_offset(mm, address)	0
 | 
					#define huge_pte_offset(mm, address)	0
 | 
				
			||||||
static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 | 
					static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										81
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							
							
						
						
									
										81
									
								
								mm/hugetlb.c
									
									
									
									
									
								
							| 
						 | 
					@ -3948,6 +3948,87 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Used by userfaultfd UFFDIO_COPY.  Based on mcopy_atomic_pte with
 | 
				
			||||||
 | 
					 * modifications for huge pages.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 | 
				
			||||||
 | 
								    pte_t *dst_pte,
 | 
				
			||||||
 | 
								    struct vm_area_struct *dst_vma,
 | 
				
			||||||
 | 
								    unsigned long dst_addr,
 | 
				
			||||||
 | 
								    unsigned long src_addr,
 | 
				
			||||||
 | 
								    struct page **pagep)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct hstate *h = hstate_vma(dst_vma);
 | 
				
			||||||
 | 
						pte_t _dst_pte;
 | 
				
			||||||
 | 
						spinlock_t *ptl;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!*pagep) {
 | 
				
			||||||
 | 
							ret = -ENOMEM;
 | 
				
			||||||
 | 
							page = alloc_huge_page(dst_vma, dst_addr, 0);
 | 
				
			||||||
 | 
							if (IS_ERR(page))
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = copy_huge_page_from_user(page,
 | 
				
			||||||
 | 
											(const void __user *) src_addr,
 | 
				
			||||||
 | 
											pages_per_huge_page(h));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* fallback to copy_from_user outside mmap_sem */
 | 
				
			||||||
 | 
							if (unlikely(ret)) {
 | 
				
			||||||
 | 
								ret = -EFAULT;
 | 
				
			||||||
 | 
								*pagep = page;
 | 
				
			||||||
 | 
								/* don't free the page */
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							page = *pagep;
 | 
				
			||||||
 | 
							*pagep = NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * The memory barrier inside __SetPageUptodate makes sure that
 | 
				
			||||||
 | 
						 * preceding stores to the page contents become visible before
 | 
				
			||||||
 | 
						 * the set_pte_at() write.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__SetPageUptodate(page);
 | 
				
			||||||
 | 
						set_page_huge_active(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
 | 
				
			||||||
 | 
						spin_lock(ptl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = -EEXIST;
 | 
				
			||||||
 | 
						if (!huge_pte_none(huge_ptep_get(dst_pte)))
 | 
				
			||||||
 | 
							goto out_release_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ClearPagePrivate(page);
 | 
				
			||||||
 | 
						hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						_dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
 | 
				
			||||||
 | 
						if (dst_vma->vm_flags & VM_WRITE)
 | 
				
			||||||
 | 
							_dst_pte = huge_pte_mkdirty(_dst_pte);
 | 
				
			||||||
 | 
						_dst_pte = pte_mkyoung(_dst_pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
 | 
				
			||||||
 | 
										dst_vma->vm_flags & VM_WRITE);
 | 
				
			||||||
 | 
						hugetlb_count_add(pages_per_huge_page(h), dst_mm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* No need to invalidate - it was non-present before */
 | 
				
			||||||
 | 
						update_mmu_cache(dst_vma, dst_addr, dst_pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock(ptl);
 | 
				
			||||||
 | 
						ret = 0;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					out_release_unlock:
 | 
				
			||||||
 | 
						spin_unlock(ptl);
 | 
				
			||||||
 | 
						put_page(page);
 | 
				
			||||||
 | 
						goto out;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
			 struct page **pages, struct vm_area_struct **vmas,
 | 
								 struct page **pages, struct vm_area_struct **vmas,
 | 
				
			||||||
			 unsigned long *position, unsigned long *nr_pages,
 | 
								 unsigned long *position, unsigned long *nr_pages,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue