forked from mirrors/linux
		
	mm: introduce fault_env
The idea borrowed from Peter's patch from patchset on speculative page faults[1]: Instead of passing around the endless list of function arguments, replace the lot with a single structure so we can change context without endless function signature changes. The changes are mostly mechanical with exception of faultaround code: filemap_map_pages() got reworked a bit. This patch is preparation for the next one. [1] http://lkml.kernel.org/r/20141020222841.302891540@infradead.org Link: http://lkml.kernel.org/r/1466021202-61880-9-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									dcddffd41d
								
							
						
					
					
						commit
						bae473a423
					
				
					 10 changed files with 474 additions and 515 deletions
				
			
		| 
						 | 
				
			
			@ -548,13 +548,13 @@ subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
 | 
			
		|||
locked. The VM will unlock the page.
 | 
			
		||||
 | 
			
		||||
	->map_pages() is called when VM asks to map easy accessible pages.
 | 
			
		||||
Filesystem should find and map pages associated with offsets from "pgoff"
 | 
			
		||||
till "max_pgoff". ->map_pages() is called with page table locked and must
 | 
			
		||||
Filesystem should find and map pages associated with offsets from "start_pgoff"
 | 
			
		||||
till "end_pgoff". ->map_pages() is called with page table locked and must
 | 
			
		||||
not block.  If it's not possible to reach a page without blocking,
 | 
			
		||||
filesystem should skip it. Filesystem should use do_set_pte() to setup
 | 
			
		||||
page table entry. Pointer to entry associated with offset "pgoff" is
 | 
			
		||||
passed in "pte" field in vm_fault structure. Pointers to entries for other
 | 
			
		||||
offsets should be calculated relative to "pte".
 | 
			
		||||
page table entry. Pointer to entry associated with the page is passed in
 | 
			
		||||
"pte" field in fault_env structure. Pointers to entries for other offsets
 | 
			
		||||
should be calculated relative to "pte".
 | 
			
		||||
 | 
			
		||||
	->page_mkwrite() is called when a previously read-only pte is
 | 
			
		||||
about to become writeable. The filesystem again must ensure that there are
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -257,10 +257,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 | 
			
		|||
 * fatal_signal_pending()s, and the mmap_sem must be released before
 | 
			
		||||
 * returning it.
 | 
			
		||||
 */
 | 
			
		||||
int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		||||
		     unsigned int flags, unsigned long reason)
 | 
			
		||||
int handle_userfault(struct fault_env *fe, unsigned long reason)
 | 
			
		||||
{
 | 
			
		||||
	struct mm_struct *mm = vma->vm_mm;
 | 
			
		||||
	struct mm_struct *mm = fe->vma->vm_mm;
 | 
			
		||||
	struct userfaultfd_ctx *ctx;
 | 
			
		||||
	struct userfaultfd_wait_queue uwq;
 | 
			
		||||
	int ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -269,7 +268,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		|||
	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 | 
			
		||||
 | 
			
		||||
	ret = VM_FAULT_SIGBUS;
 | 
			
		||||
	ctx = vma->vm_userfaultfd_ctx.ctx;
 | 
			
		||||
	ctx = fe->vma->vm_userfaultfd_ctx.ctx;
 | 
			
		||||
	if (!ctx)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -302,17 +301,17 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		|||
	 * without first stopping userland access to the memory. For
 | 
			
		||||
	 * VM_UFFD_MISSING userfaults this is enough for now.
 | 
			
		||||
	 */
 | 
			
		||||
	if (unlikely(!(flags & FAULT_FLAG_ALLOW_RETRY))) {
 | 
			
		||||
	if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * Validate the invariant that nowait must allow retry
 | 
			
		||||
		 * to be sure not to return SIGBUS erroneously on
 | 
			
		||||
		 * nowait invocations.
 | 
			
		||||
		 */
 | 
			
		||||
		BUG_ON(flags & FAULT_FLAG_RETRY_NOWAIT);
 | 
			
		||||
		BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
 | 
			
		||||
#ifdef CONFIG_DEBUG_VM
 | 
			
		||||
		if (printk_ratelimit()) {
 | 
			
		||||
			printk(KERN_WARNING
 | 
			
		||||
			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", flags);
 | 
			
		||||
			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
 | 
			
		||||
			dump_stack();
 | 
			
		||||
		}
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -324,7 +323,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		|||
	 * and wait.
 | 
			
		||||
	 */
 | 
			
		||||
	ret = VM_FAULT_RETRY;
 | 
			
		||||
	if (flags & FAULT_FLAG_RETRY_NOWAIT)
 | 
			
		||||
	if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	/* take the reference before dropping the mmap_sem */
 | 
			
		||||
| 
						 | 
				
			
			@ -332,10 +331,11 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		|||
 | 
			
		||||
	init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
 | 
			
		||||
	uwq.wq.private = current;
 | 
			
		||||
	uwq.msg = userfault_msg(address, flags, reason);
 | 
			
		||||
	uwq.msg = userfault_msg(fe->address, fe->flags, reason);
 | 
			
		||||
	uwq.ctx = ctx;
 | 
			
		||||
 | 
			
		||||
	return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 | 
			
		||||
	return_to_userland =
 | 
			
		||||
		(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 | 
			
		||||
		(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
 | 
			
		||||
 | 
			
		||||
	spin_lock(&ctx->fault_pending_wqh.lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -353,7 +353,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		|||
			  TASK_KILLABLE);
 | 
			
		||||
	spin_unlock(&ctx->fault_pending_wqh.lock);
 | 
			
		||||
 | 
			
		||||
	must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
 | 
			
		||||
	must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
 | 
			
		||||
	up_read(&mm->mmap_sem);
 | 
			
		||||
 | 
			
		||||
	if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,20 +1,12 @@
 | 
			
		|||
#ifndef _LINUX_HUGE_MM_H
 | 
			
		||||
#define _LINUX_HUGE_MM_H
 | 
			
		||||
 | 
			
		||||
extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
			
		||||
				      struct vm_area_struct *vma,
 | 
			
		||||
				      unsigned long address, pmd_t *pmd,
 | 
			
		||||
				      unsigned int flags);
 | 
			
		||||
extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
 | 
			
		||||
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
			
		||||
			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 | 
			
		||||
			 struct vm_area_struct *vma);
 | 
			
		||||
extern void huge_pmd_set_accessed(struct mm_struct *mm,
 | 
			
		||||
				  struct vm_area_struct *vma,
 | 
			
		||||
				  unsigned long address, pmd_t *pmd,
 | 
			
		||||
				  pmd_t orig_pmd, int dirty);
 | 
			
		||||
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
			       unsigned long address, pmd_t *pmd,
 | 
			
		||||
			       pmd_t orig_pmd);
 | 
			
		||||
extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
 | 
			
		||||
extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
 | 
			
		||||
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 | 
			
		||||
					  unsigned long addr,
 | 
			
		||||
					  pmd_t *pmd,
 | 
			
		||||
| 
						 | 
				
			
			@ -134,8 +126,7 @@ static inline int hpage_nr_pages(struct page *page)
 | 
			
		|||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 | 
			
		||||
extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
 | 
			
		||||
 | 
			
		||||
extern struct page *huge_zero_page;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -196,8 +187,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 | 
			
		|||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 | 
			
		||||
static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
 | 
			
		||||
{
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -309,10 +309,27 @@ struct vm_fault {
 | 
			
		|||
					 * VM_FAULT_DAX_LOCKED and fill in
 | 
			
		||||
					 * entry here.
 | 
			
		||||
					 */
 | 
			
		||||
	/* for ->map_pages() only */
 | 
			
		||||
	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
 | 
			
		||||
					 * max_pgoff inclusive */
 | 
			
		||||
	pte_t *pte;			/* pte entry associated with ->pgoff */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Page fault context: passes though page fault handler instead of endless list
 | 
			
		||||
 * of function arguments.
 | 
			
		||||
 */
 | 
			
		||||
struct fault_env {
 | 
			
		||||
	struct vm_area_struct *vma;	/* Target VMA */
 | 
			
		||||
	unsigned long address;		/* Faulting virtual address */
 | 
			
		||||
	unsigned int flags;		/* FAULT_FLAG_xxx flags */
 | 
			
		||||
	pmd_t *pmd;			/* Pointer to pmd entry matching
 | 
			
		||||
					 * the 'address'
 | 
			
		||||
					 */
 | 
			
		||||
	pte_t *pte;			/* Pointer to pte entry matching
 | 
			
		||||
					 * the 'address'. NULL if the page
 | 
			
		||||
					 * table hasn't been allocated.
 | 
			
		||||
					 */
 | 
			
		||||
	spinlock_t *ptl;		/* Page table lock.
 | 
			
		||||
					 * Protects pte page table if 'pte'
 | 
			
		||||
					 * is not NULL, otherwise pmd.
 | 
			
		||||
					 */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -327,7 +344,8 @@ struct vm_operations_struct {
 | 
			
		|||
	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
			
		||||
	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
 | 
			
		||||
						pmd_t *, unsigned int flags);
 | 
			
		||||
	void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
			
		||||
	void (*map_pages)(struct fault_env *fe,
 | 
			
		||||
			pgoff_t start_pgoff, pgoff_t end_pgoff);
 | 
			
		||||
 | 
			
		||||
	/* notification that a previously read-only page is about to become
 | 
			
		||||
	 * writable, if an error is returned it will cause a SIGBUS */
 | 
			
		||||
| 
						 | 
				
			
			@ -600,8 +618,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 | 
			
		|||
	return pte;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 | 
			
		||||
		struct page *page, pte_t *pte, bool write, bool anon);
 | 
			
		||||
void do_set_pte(struct fault_env *fe, struct page *page);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -2062,7 +2079,8 @@ extern void truncate_inode_pages_final(struct address_space *);
 | 
			
		|||
 | 
			
		||||
/* generic vm_area_ops exported for stackable file systems */
 | 
			
		||||
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 | 
			
		||||
extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
			
		||||
extern void filemap_map_pages(struct fault_env *fe,
 | 
			
		||||
		pgoff_t start_pgoff, pgoff_t end_pgoff);
 | 
			
		||||
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
			
		||||
 | 
			
		||||
/* mm/page-writeback.c */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,8 +27,7 @@
 | 
			
		|||
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 | 
			
		||||
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 | 
			
		||||
 | 
			
		||||
extern int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
			
		||||
			    unsigned int flags, unsigned long reason);
 | 
			
		||||
extern int handle_userfault(struct fault_env *fe, unsigned long reason);
 | 
			
		||||
 | 
			
		||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 | 
			
		||||
			    unsigned long src_start, unsigned long len);
 | 
			
		||||
| 
						 | 
				
			
			@ -56,10 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 | 
			
		|||
#else /* CONFIG_USERFAULTFD */
 | 
			
		||||
 | 
			
		||||
/* mm helpers */
 | 
			
		||||
static inline int handle_userfault(struct vm_area_struct *vma,
 | 
			
		||||
				   unsigned long address,
 | 
			
		||||
				   unsigned int flags,
 | 
			
		||||
				   unsigned long reason)
 | 
			
		||||
static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
 | 
			
		||||
{
 | 
			
		||||
	return VM_FAULT_SIGBUS;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										28
									
								
								mm/filemap.c
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								mm/filemap.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -2128,22 +2128,27 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL(filemap_fault);
 | 
			
		||||
 | 
			
		||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
void filemap_map_pages(struct fault_env *fe,
 | 
			
		||||
		pgoff_t start_pgoff, pgoff_t end_pgoff)
 | 
			
		||||
{
 | 
			
		||||
	struct radix_tree_iter iter;
 | 
			
		||||
	void **slot;
 | 
			
		||||
	struct file *file = vma->vm_file;
 | 
			
		||||
	struct file *file = fe->vma->vm_file;
 | 
			
		||||
	struct address_space *mapping = file->f_mapping;
 | 
			
		||||
	pgoff_t last_pgoff = start_pgoff;
 | 
			
		||||
	loff_t size;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long address = (unsigned long) vmf->virtual_address;
 | 
			
		||||
	unsigned long addr;
 | 
			
		||||
	pte_t *pte;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
 | 
			
		||||
		if (iter.index > vmf->max_pgoff)
 | 
			
		||||
	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
 | 
			
		||||
			start_pgoff) {
 | 
			
		||||
		if (iter.index > end_pgoff)
 | 
			
		||||
			break;
 | 
			
		||||
		fe->pte += iter.index - last_pgoff;
 | 
			
		||||
		fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
 | 
			
		||||
		last_pgoff = iter.index;
 | 
			
		||||
		if (!pte_none(*fe->pte))
 | 
			
		||||
			goto next;
 | 
			
		||||
repeat:
 | 
			
		||||
		page = radix_tree_deref_slot(slot);
 | 
			
		||||
		if (unlikely(!page))
 | 
			
		||||
| 
						 | 
				
			
			@ -2179,14 +2184,9 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		|||
		if (page->index >= size >> PAGE_SHIFT)
 | 
			
		||||
			goto unlock;
 | 
			
		||||
 | 
			
		||||
		pte = vmf->pte + page->index - vmf->pgoff;
 | 
			
		||||
		if (!pte_none(*pte))
 | 
			
		||||
			goto unlock;
 | 
			
		||||
 | 
			
		||||
		if (file->f_ra.mmap_miss > 0)
 | 
			
		||||
			file->f_ra.mmap_miss--;
 | 
			
		||||
		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
 | 
			
		||||
		do_set_pte(vma, addr, page, pte, false, false);
 | 
			
		||||
		do_set_pte(fe, page);
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		goto next;
 | 
			
		||||
unlock:
 | 
			
		||||
| 
						 | 
				
			
			@ -2194,7 +2194,7 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		|||
skip:
 | 
			
		||||
		put_page(page);
 | 
			
		||||
next:
 | 
			
		||||
		if (iter.index == vmf->max_pgoff)
 | 
			
		||||
		if (iter.index == end_pgoff)
 | 
			
		||||
			break;
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										278
									
								
								mm/huge_memory.c
									
									
									
									
									
								
							
							
						
						
									
										278
									
								
								mm/huge_memory.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -821,26 +821,23 @@ void prep_transhuge_page(struct page *page)
 | 
			
		|||
	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
			
		||||
					struct vm_area_struct *vma,
 | 
			
		||||
					unsigned long address, pmd_t *pmd,
 | 
			
		||||
					struct page *page, gfp_t gfp,
 | 
			
		||||
					unsigned int flags)
 | 
			
		||||
static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 | 
			
		||||
		gfp_t gfp)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = fe->vma;
 | 
			
		||||
	struct mem_cgroup *memcg;
 | 
			
		||||
	pgtable_t pgtable;
 | 
			
		||||
	spinlock_t *ptl;
 | 
			
		||||
	unsigned long haddr = address & HPAGE_PMD_MASK;
 | 
			
		||||
	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
 | 
			
		||||
	VM_BUG_ON_PAGE(!PageCompound(page), page);
 | 
			
		||||
 | 
			
		||||
	if (mem_cgroup_try_charge(page, mm, gfp, &memcg, true)) {
 | 
			
		||||
	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
 | 
			
		||||
		put_page(page);
 | 
			
		||||
		count_vm_event(THP_FAULT_FALLBACK);
 | 
			
		||||
		return VM_FAULT_FALLBACK;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pgtable = pte_alloc_one(mm, haddr);
 | 
			
		||||
	pgtable = pte_alloc_one(vma->vm_mm, haddr);
 | 
			
		||||
	if (unlikely(!pgtable)) {
 | 
			
		||||
		mem_cgroup_cancel_charge(page, memcg, true);
 | 
			
		||||
		put_page(page);
 | 
			
		||||
| 
						 | 
				
			
			@ -855,12 +852,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
			
		|||
	 */
 | 
			
		||||
	__SetPageUptodate(page);
 | 
			
		||||
 | 
			
		||||
	ptl = pmd_lock(mm, pmd);
 | 
			
		||||
	if (unlikely(!pmd_none(*pmd))) {
 | 
			
		||||
		spin_unlock(ptl);
 | 
			
		||||
	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
			
		||||
	if (unlikely(!pmd_none(*fe->pmd))) {
 | 
			
		||||
		spin_unlock(fe->ptl);
 | 
			
		||||
		mem_cgroup_cancel_charge(page, memcg, true);
 | 
			
		||||
		put_page(page);
 | 
			
		||||
		pte_free(mm, pgtable);
 | 
			
		||||
		pte_free(vma->vm_mm, pgtable);
 | 
			
		||||
	} else {
 | 
			
		||||
		pmd_t entry;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -868,12 +865,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
			
		|||
		if (userfaultfd_missing(vma)) {
 | 
			
		||||
			int ret;
 | 
			
		||||
 | 
			
		||||
			spin_unlock(ptl);
 | 
			
		||||
			spin_unlock(fe->ptl);
 | 
			
		||||
			mem_cgroup_cancel_charge(page, memcg, true);
 | 
			
		||||
			put_page(page);
 | 
			
		||||
			pte_free(mm, pgtable);
 | 
			
		||||
			ret = handle_userfault(vma, address, flags,
 | 
			
		||||
					       VM_UFFD_MISSING);
 | 
			
		||||
			pte_free(vma->vm_mm, pgtable);
 | 
			
		||||
			ret = handle_userfault(fe, VM_UFFD_MISSING);
 | 
			
		||||
			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
			
		||||
			return ret;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -883,11 +879,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
			
		|||
		page_add_new_anon_rmap(page, vma, haddr, true);
 | 
			
		||||
		mem_cgroup_commit_charge(page, memcg, false, true);
 | 
			
		||||
		lru_cache_add_active_or_unevictable(page, vma);
 | 
			
		||||
		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 | 
			
		||||
		set_pmd_at(mm, haddr, pmd, entry);
 | 
			
		||||
		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
			
		||||
		atomic_long_inc(&mm->nr_ptes);
 | 
			
		||||
		spin_unlock(ptl);
 | 
			
		||||
		pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
 | 
			
		||||
		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
 | 
			
		||||
		add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
			
		||||
		atomic_long_inc(&vma->vm_mm->nr_ptes);
 | 
			
		||||
		spin_unlock(fe->ptl);
 | 
			
		||||
		count_vm_event(THP_FAULT_ALLOC);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -937,13 +933,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 | 
			
		|||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
			       unsigned long address, pmd_t *pmd,
 | 
			
		||||
			       unsigned int flags)
 | 
			
		||||
int do_huge_pmd_anonymous_page(struct fault_env *fe)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = fe->vma;
 | 
			
		||||
	gfp_t gfp;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long haddr = address & HPAGE_PMD_MASK;
 | 
			
		||||
	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
 | 
			
		||||
	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
 | 
			
		||||
		return VM_FAULT_FALLBACK;
 | 
			
		||||
| 
						 | 
				
			
			@ -951,42 +946,40 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		return VM_FAULT_OOM;
 | 
			
		||||
	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 | 
			
		||||
		return VM_FAULT_OOM;
 | 
			
		||||
	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
 | 
			
		||||
	if (!(fe->flags & FAULT_FLAG_WRITE) &&
 | 
			
		||||
			!mm_forbids_zeropage(vma->vm_mm) &&
 | 
			
		||||
			transparent_hugepage_use_zero_page()) {
 | 
			
		||||
		spinlock_t *ptl;
 | 
			
		||||
		pgtable_t pgtable;
 | 
			
		||||
		struct page *zero_page;
 | 
			
		||||
		bool set;
 | 
			
		||||
		int ret;
 | 
			
		||||
		pgtable = pte_alloc_one(mm, haddr);
 | 
			
		||||
		pgtable = pte_alloc_one(vma->vm_mm, haddr);
 | 
			
		||||
		if (unlikely(!pgtable))
 | 
			
		||||
			return VM_FAULT_OOM;
 | 
			
		||||
		zero_page = get_huge_zero_page();
 | 
			
		||||
		if (unlikely(!zero_page)) {
 | 
			
		||||
			pte_free(mm, pgtable);
 | 
			
		||||
			pte_free(vma->vm_mm, pgtable);
 | 
			
		||||
			count_vm_event(THP_FAULT_FALLBACK);
 | 
			
		||||
			return VM_FAULT_FALLBACK;
 | 
			
		||||
		}
 | 
			
		||||
		ptl = pmd_lock(mm, pmd);
 | 
			
		||||
		fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
			
		||||
		ret = 0;
 | 
			
		||||
		set = false;
 | 
			
		||||
		if (pmd_none(*pmd)) {
 | 
			
		||||
		if (pmd_none(*fe->pmd)) {
 | 
			
		||||
			if (userfaultfd_missing(vma)) {
 | 
			
		||||
				spin_unlock(ptl);
 | 
			
		||||
				ret = handle_userfault(vma, address, flags,
 | 
			
		||||
						       VM_UFFD_MISSING);
 | 
			
		||||
				spin_unlock(fe->ptl);
 | 
			
		||||
				ret = handle_userfault(fe, VM_UFFD_MISSING);
 | 
			
		||||
				VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
			
		||||
			} else {
 | 
			
		||||
				set_huge_zero_page(pgtable, mm, vma,
 | 
			
		||||
						   haddr, pmd,
 | 
			
		||||
						   zero_page);
 | 
			
		||||
				spin_unlock(ptl);
 | 
			
		||||
				set_huge_zero_page(pgtable, vma->vm_mm, vma,
 | 
			
		||||
						   haddr, fe->pmd, zero_page);
 | 
			
		||||
				spin_unlock(fe->ptl);
 | 
			
		||||
				set = true;
 | 
			
		||||
			}
 | 
			
		||||
		} else
 | 
			
		||||
			spin_unlock(ptl);
 | 
			
		||||
			spin_unlock(fe->ptl);
 | 
			
		||||
		if (!set) {
 | 
			
		||||
			pte_free(mm, pgtable);
 | 
			
		||||
			pte_free(vma->vm_mm, pgtable);
 | 
			
		||||
			put_huge_zero_page();
 | 
			
		||||
		}
 | 
			
		||||
		return ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -998,8 +991,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		return VM_FAULT_FALLBACK;
 | 
			
		||||
	}
 | 
			
		||||
	prep_transhuge_page(page);
 | 
			
		||||
	return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
 | 
			
		||||
					    flags);
 | 
			
		||||
	return __do_huge_pmd_anonymous_page(fe, page, gfp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 | 
			
		||||
| 
						 | 
				
			
			@ -1172,38 +1164,31 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void huge_pmd_set_accessed(struct mm_struct *mm,
 | 
			
		||||
			   struct vm_area_struct *vma,
 | 
			
		||||
			   unsigned long address,
 | 
			
		||||
			   pmd_t *pmd, pmd_t orig_pmd,
 | 
			
		||||
			   int dirty)
 | 
			
		||||
void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
 | 
			
		||||
{
 | 
			
		||||
	spinlock_t *ptl;
 | 
			
		||||
	pmd_t entry;
 | 
			
		||||
	unsigned long haddr;
 | 
			
		||||
 | 
			
		||||
	ptl = pmd_lock(mm, pmd);
 | 
			
		||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
			
		||||
	fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
 | 
			
		||||
	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
			
		||||
		goto unlock;
 | 
			
		||||
 | 
			
		||||
	entry = pmd_mkyoung(orig_pmd);
 | 
			
		||||
	haddr = address & HPAGE_PMD_MASK;
 | 
			
		||||
	if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
 | 
			
		||||
		update_mmu_cache_pmd(vma, address, pmd);
 | 
			
		||||
	haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
	if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
 | 
			
		||||
				fe->flags & FAULT_FLAG_WRITE))
 | 
			
		||||
		update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
 | 
			
		||||
 | 
			
		||||
unlock:
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
			
		||||
					struct vm_area_struct *vma,
 | 
			
		||||
					unsigned long address,
 | 
			
		||||
					pmd_t *pmd, pmd_t orig_pmd,
 | 
			
		||||
					struct page *page,
 | 
			
		||||
					unsigned long haddr)
 | 
			
		||||
static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
 | 
			
		||||
		struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = fe->vma;
 | 
			
		||||
	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
	struct mem_cgroup *memcg;
 | 
			
		||||
	spinlock_t *ptl;
 | 
			
		||||
	pgtable_t pgtable;
 | 
			
		||||
	pmd_t _pmd;
 | 
			
		||||
	int ret = 0, i;
 | 
			
		||||
| 
						 | 
				
			
			@ -1220,11 +1205,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
			
		|||
 | 
			
		||||
	for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
			
		||||
		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
 | 
			
		||||
					       __GFP_OTHER_NODE,
 | 
			
		||||
					       vma, address, page_to_nid(page));
 | 
			
		||||
					       __GFP_OTHER_NODE, vma,
 | 
			
		||||
					       fe->address, page_to_nid(page));
 | 
			
		||||
		if (unlikely(!pages[i] ||
 | 
			
		||||
			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
 | 
			
		||||
						   &memcg, false))) {
 | 
			
		||||
			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
 | 
			
		||||
				     GFP_KERNEL, &memcg, false))) {
 | 
			
		||||
			if (pages[i])
 | 
			
		||||
				put_page(pages[i]);
 | 
			
		||||
			while (--i >= 0) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1250,41 +1235,41 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
			
		|||
 | 
			
		||||
	mmun_start = haddr;
 | 
			
		||||
	mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
			
		||||
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 | 
			
		||||
	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 | 
			
		||||
 | 
			
		||||
	ptl = pmd_lock(mm, pmd);
 | 
			
		||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
			
		||||
	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
			
		||||
	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
			
		||||
		goto out_free_pages;
 | 
			
		||||
	VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
			
		||||
 | 
			
		||||
	pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 | 
			
		||||
	pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 | 
			
		||||
	/* leave pmd empty until pte is filled */
 | 
			
		||||
 | 
			
		||||
	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 | 
			
		||||
	pmd_populate(mm, &_pmd, pgtable);
 | 
			
		||||
	pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
 | 
			
		||||
	pmd_populate(vma->vm_mm, &_pmd, pgtable);
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
 | 
			
		||||
		pte_t *pte, entry;
 | 
			
		||||
		pte_t entry;
 | 
			
		||||
		entry = mk_pte(pages[i], vma->vm_page_prot);
 | 
			
		||||
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 | 
			
		||||
		memcg = (void *)page_private(pages[i]);
 | 
			
		||||
		set_page_private(pages[i], 0);
 | 
			
		||||
		page_add_new_anon_rmap(pages[i], vma, haddr, false);
 | 
			
		||||
		page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
 | 
			
		||||
		mem_cgroup_commit_charge(pages[i], memcg, false, false);
 | 
			
		||||
		lru_cache_add_active_or_unevictable(pages[i], vma);
 | 
			
		||||
		pte = pte_offset_map(&_pmd, haddr);
 | 
			
		||||
		VM_BUG_ON(!pte_none(*pte));
 | 
			
		||||
		set_pte_at(mm, haddr, pte, entry);
 | 
			
		||||
		pte_unmap(pte);
 | 
			
		||||
		fe->pte = pte_offset_map(&_pmd, haddr);
 | 
			
		||||
		VM_BUG_ON(!pte_none(*fe->pte));
 | 
			
		||||
		set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
 | 
			
		||||
		pte_unmap(fe->pte);
 | 
			
		||||
	}
 | 
			
		||||
	kfree(pages);
 | 
			
		||||
 | 
			
		||||
	smp_wmb(); /* make pte visible before pmd */
 | 
			
		||||
	pmd_populate(mm, pmd, pgtable);
 | 
			
		||||
	pmd_populate(vma->vm_mm, fe->pmd, pgtable);
 | 
			
		||||
	page_remove_rmap(page, true);
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
 | 
			
		||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
			
		||||
	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
			
		||||
 | 
			
		||||
	ret |= VM_FAULT_WRITE;
 | 
			
		||||
	put_page(page);
 | 
			
		||||
| 
						 | 
				
			
			@ -1293,8 +1278,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
			
		|||
	return ret;
 | 
			
		||||
 | 
			
		||||
out_free_pages:
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
			
		||||
	for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
			
		||||
		memcg = (void *)page_private(pages[i]);
 | 
			
		||||
		set_page_private(pages[i], 0);
 | 
			
		||||
| 
						 | 
				
			
			@ -1305,25 +1290,23 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
			
		|||
	goto out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
			unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
 | 
			
		||||
int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 | 
			
		||||
{
 | 
			
		||||
	spinlock_t *ptl;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
	struct vm_area_struct *vma = fe->vma;
 | 
			
		||||
	struct page *page = NULL, *new_page;
 | 
			
		||||
	struct mem_cgroup *memcg;
 | 
			
		||||
	unsigned long haddr;
 | 
			
		||||
	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
	unsigned long mmun_start;	/* For mmu_notifiers */
 | 
			
		||||
	unsigned long mmun_end;		/* For mmu_notifiers */
 | 
			
		||||
	gfp_t huge_gfp;			/* for allocation and charge */
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	ptl = pmd_lockptr(mm, pmd);
 | 
			
		||||
	fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
 | 
			
		||||
	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 | 
			
		||||
	haddr = address & HPAGE_PMD_MASK;
 | 
			
		||||
	if (is_huge_zero_pmd(orig_pmd))
 | 
			
		||||
		goto alloc;
 | 
			
		||||
	spin_lock(ptl);
 | 
			
		||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
			
		||||
	spin_lock(fe->ptl);
 | 
			
		||||
	if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
 | 
			
		||||
	page = pmd_page(orig_pmd);
 | 
			
		||||
| 
						 | 
				
			
			@ -1336,13 +1319,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		pmd_t entry;
 | 
			
		||||
		entry = pmd_mkyoung(orig_pmd);
 | 
			
		||||
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
			
		||||
		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
 | 
			
		||||
			update_mmu_cache_pmd(vma, address, pmd);
 | 
			
		||||
		if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry,  1))
 | 
			
		||||
			update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
			
		||||
		ret |= VM_FAULT_WRITE;
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	}
 | 
			
		||||
	get_page(page);
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
alloc:
 | 
			
		||||
	if (transparent_hugepage_enabled(vma) &&
 | 
			
		||||
	    !transparent_hugepage_debug_cow()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1355,13 +1338,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		prep_transhuge_page(new_page);
 | 
			
		||||
	} else {
 | 
			
		||||
		if (!page) {
 | 
			
		||||
			split_huge_pmd(vma, pmd, address);
 | 
			
		||||
			split_huge_pmd(vma, fe->pmd, fe->address);
 | 
			
		||||
			ret |= VM_FAULT_FALLBACK;
 | 
			
		||||
		} else {
 | 
			
		||||
			ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
 | 
			
		||||
					pmd, orig_pmd, page, haddr);
 | 
			
		||||
			ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
 | 
			
		||||
			if (ret & VM_FAULT_OOM) {
 | 
			
		||||
				split_huge_pmd(vma, pmd, address);
 | 
			
		||||
				split_huge_pmd(vma, fe->pmd, fe->address);
 | 
			
		||||
				ret |= VM_FAULT_FALLBACK;
 | 
			
		||||
			}
 | 
			
		||||
			put_page(page);
 | 
			
		||||
| 
						 | 
				
			
			@ -1370,14 +1352,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg,
 | 
			
		||||
					   true))) {
 | 
			
		||||
	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
 | 
			
		||||
					huge_gfp, &memcg, true))) {
 | 
			
		||||
		put_page(new_page);
 | 
			
		||||
		if (page) {
 | 
			
		||||
			split_huge_pmd(vma, pmd, address);
 | 
			
		||||
		split_huge_pmd(vma, fe->pmd, fe->address);
 | 
			
		||||
		if (page)
 | 
			
		||||
			put_page(page);
 | 
			
		||||
		} else
 | 
			
		||||
			split_huge_pmd(vma, pmd, address);
 | 
			
		||||
		ret |= VM_FAULT_FALLBACK;
 | 
			
		||||
		count_vm_event(THP_FAULT_FALLBACK);
 | 
			
		||||
		goto out;
 | 
			
		||||
| 
						 | 
				
			
			@ -1393,13 +1373,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
 | 
			
		||||
	mmun_start = haddr;
 | 
			
		||||
	mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
			
		||||
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 | 
			
		||||
	mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 | 
			
		||||
 | 
			
		||||
	spin_lock(ptl);
 | 
			
		||||
	spin_lock(fe->ptl);
 | 
			
		||||
	if (page)
 | 
			
		||||
		put_page(page);
 | 
			
		||||
	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
 | 
			
		||||
		spin_unlock(ptl);
 | 
			
		||||
	if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
 | 
			
		||||
		spin_unlock(fe->ptl);
 | 
			
		||||
		mem_cgroup_cancel_charge(new_page, memcg, true);
 | 
			
		||||
		put_page(new_page);
 | 
			
		||||
		goto out_mn;
 | 
			
		||||
| 
						 | 
				
			
			@ -1407,14 +1387,14 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		pmd_t entry;
 | 
			
		||||
		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 | 
			
		||||
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
			
		||||
		pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 | 
			
		||||
		pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 | 
			
		||||
		page_add_new_anon_rmap(new_page, vma, haddr, true);
 | 
			
		||||
		mem_cgroup_commit_charge(new_page, memcg, false, true);
 | 
			
		||||
		lru_cache_add_active_or_unevictable(new_page, vma);
 | 
			
		||||
		set_pmd_at(mm, haddr, pmd, entry);
 | 
			
		||||
		update_mmu_cache_pmd(vma, address, pmd);
 | 
			
		||||
		set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
 | 
			
		||||
		update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
			
		||||
		if (!page) {
 | 
			
		||||
			add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
			
		||||
			add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
			
		||||
			put_huge_zero_page();
 | 
			
		||||
		} else {
 | 
			
		||||
			VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
			
		||||
| 
						 | 
				
			
			@ -1423,13 +1403,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
		}
 | 
			
		||||
		ret |= VM_FAULT_WRITE;
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
out_mn:
 | 
			
		||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
			
		||||
	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
			
		||||
out:
 | 
			
		||||
	return ret;
 | 
			
		||||
out_unlock:
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1489,13 +1469,12 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/* NUMA hinting page fault entry point for trans huge pmds */
 | 
			
		||||
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 | 
			
		||||
int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 | 
			
		||||
{
 | 
			
		||||
	spinlock_t *ptl;
 | 
			
		||||
	struct vm_area_struct *vma = fe->vma;
 | 
			
		||||
	struct anon_vma *anon_vma = NULL;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long haddr = addr & HPAGE_PMD_MASK;
 | 
			
		||||
	unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
			
		||||
	int page_nid = -1, this_nid = numa_node_id();
 | 
			
		||||
	int target_nid, last_cpupid = -1;
 | 
			
		||||
	bool page_locked;
 | 
			
		||||
| 
						 | 
				
			
			@ -1506,8 +1485,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
	/* A PROT_NONE fault should not end up here */
 | 
			
		||||
	BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
 | 
			
		||||
 | 
			
		||||
	ptl = pmd_lock(mm, pmdp);
 | 
			
		||||
	if (unlikely(!pmd_same(pmd, *pmdp)))
 | 
			
		||||
	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
			
		||||
	if (unlikely(!pmd_same(pmd, *fe->pmd)))
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1515,9 +1494,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
	 * without disrupting NUMA hinting information. Do not relock and
 | 
			
		||||
	 * check_same as the page may no longer be mapped.
 | 
			
		||||
	 */
 | 
			
		||||
	if (unlikely(pmd_trans_migrating(*pmdp))) {
 | 
			
		||||
		page = pmd_page(*pmdp);
 | 
			
		||||
		spin_unlock(ptl);
 | 
			
		||||
	if (unlikely(pmd_trans_migrating(*fe->pmd))) {
 | 
			
		||||
		page = pmd_page(*fe->pmd);
 | 
			
		||||
		spin_unlock(fe->ptl);
 | 
			
		||||
		wait_on_page_locked(page);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1550,7 +1529,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
 | 
			
		||||
	/* Migration could have started since the pmd_trans_migrating check */
 | 
			
		||||
	if (!page_locked) {
 | 
			
		||||
		spin_unlock(ptl);
 | 
			
		||||
		spin_unlock(fe->ptl);
 | 
			
		||||
		wait_on_page_locked(page);
 | 
			
		||||
		page_nid = -1;
 | 
			
		||||
		goto out;
 | 
			
		||||
| 
						 | 
				
			
			@ -1561,12 +1540,12 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
	 * to serialises splits
 | 
			
		||||
	 */
 | 
			
		||||
	get_page(page);
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
	anon_vma = page_lock_anon_vma_read(page);
 | 
			
		||||
 | 
			
		||||
	/* Confirm the PMD did not change while page_table_lock was released */
 | 
			
		||||
	spin_lock(ptl);
 | 
			
		||||
	if (unlikely(!pmd_same(pmd, *pmdp))) {
 | 
			
		||||
	spin_lock(fe->ptl);
 | 
			
		||||
	if (unlikely(!pmd_same(pmd, *fe->pmd))) {
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		put_page(page);
 | 
			
		||||
		page_nid = -1;
 | 
			
		||||
| 
						 | 
				
			
			@ -1584,9 +1563,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
	 * Migrate the THP to the requested node, returns with page unlocked
 | 
			
		||||
	 * and access rights restored.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	migrated = migrate_misplaced_transhuge_page(mm, vma,
 | 
			
		||||
				pmdp, pmd, addr, page, target_nid);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
	migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
 | 
			
		||||
				fe->pmd, pmd, fe->address, page, target_nid);
 | 
			
		||||
	if (migrated) {
 | 
			
		||||
		flags |= TNF_MIGRATED;
 | 
			
		||||
		page_nid = target_nid;
 | 
			
		||||
| 
						 | 
				
			
			@ -1601,18 +1580,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		|||
	pmd = pmd_mkyoung(pmd);
 | 
			
		||||
	if (was_writable)
 | 
			
		||||
		pmd = pmd_mkwrite(pmd);
 | 
			
		||||
	set_pmd_at(mm, haddr, pmdp, pmd);
 | 
			
		||||
	update_mmu_cache_pmd(vma, addr, pmdp);
 | 
			
		||||
	set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
 | 
			
		||||
	update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
			
		||||
	unlock_page(page);
 | 
			
		||||
out_unlock:
 | 
			
		||||
	spin_unlock(ptl);
 | 
			
		||||
	spin_unlock(fe->ptl);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	if (anon_vma)
 | 
			
		||||
		page_unlock_anon_vma_read(anon_vma);
 | 
			
		||||
 | 
			
		||||
	if (page_nid != -1)
 | 
			
		||||
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
 | 
			
		||||
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2413,20 +2392,23 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 | 
			
		|||
					struct vm_area_struct *vma,
 | 
			
		||||
					unsigned long address, pmd_t *pmd)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long _address;
 | 
			
		||||
	pte_t *pte, pteval;
 | 
			
		||||
	pte_t pteval;
 | 
			
		||||
	int swapped_in = 0, ret = 0;
 | 
			
		||||
	struct fault_env fe = {
 | 
			
		||||
		.vma = vma,
 | 
			
		||||
		.address = address,
 | 
			
		||||
		.flags = FAULT_FLAG_ALLOW_RETRY,
 | 
			
		||||
		.pmd = pmd,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	pte = pte_offset_map(pmd, address);
 | 
			
		||||
	for (_address = address; _address < address + HPAGE_PMD_NR*PAGE_SIZE;
 | 
			
		||||
	     pte++, _address += PAGE_SIZE) {
 | 
			
		||||
		pteval = *pte;
 | 
			
		||||
	fe.pte = pte_offset_map(pmd, address);
 | 
			
		||||
	for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
 | 
			
		||||
			fe.pte++, fe.address += PAGE_SIZE) {
 | 
			
		||||
		pteval = *fe.pte;
 | 
			
		||||
		if (!is_swap_pte(pteval))
 | 
			
		||||
			continue;
 | 
			
		||||
		swapped_in++;
 | 
			
		||||
		ret = do_swap_page(mm, vma, _address, pte, pmd,
 | 
			
		||||
				   FAULT_FLAG_ALLOW_RETRY,
 | 
			
		||||
				   pteval);
 | 
			
		||||
		ret = do_swap_page(&fe, pteval);
 | 
			
		||||
		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 | 
			
		||||
		if (ret & VM_FAULT_RETRY) {
 | 
			
		||||
			down_read(&mm->mmap_sem);
 | 
			
		||||
| 
						 | 
				
			
			@ -2442,10 +2424,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 | 
			
		|||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
		/* pte is unmapped now, we need to map it */
 | 
			
		||||
		pte = pte_offset_map(pmd, _address);
 | 
			
		||||
		fe.pte = pte_offset_map(pmd, fe.address);
 | 
			
		||||
	}
 | 
			
		||||
	pte--;
 | 
			
		||||
	pte_unmap(pte);
 | 
			
		||||
	fe.pte--;
 | 
			
		||||
	pte_unmap(fe.pte);
 | 
			
		||||
	trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -36,9 +36,7 @@
 | 
			
		|||
/* Do not use these with a slab allocator */
 | 
			
		||||
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 | 
			
		||||
 | 
			
		||||
extern int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
			
		||||
			unsigned long address, pte_t *page_table, pmd_t *pmd,
 | 
			
		||||
			unsigned int flags, pte_t orig_pte);
 | 
			
		||||
int do_swap_page(struct fault_env *fe, pte_t orig_pte);
 | 
			
		||||
 | 
			
		||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 | 
			
		||||
		unsigned long floor, unsigned long ceiling);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										582
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										582
									
								
								mm/memory.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL(filemap_fault);
 | 
			
		||||
 | 
			
		||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
void filemap_map_pages(struct fault_env *fe,
 | 
			
		||||
		pgoff_t start_pgoff, pgoff_t end_pgoff)
 | 
			
		||||
{
 | 
			
		||||
	BUG();
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue