forked from mirrors/linux
		
	mm: introduce fault_env
The idea borrowed from Peter's patch from patchset on speculative page faults[1]: Instead of passing around the endless list of function arguments, replace the lot with a single structure so we can change context without endless function signature changes. The changes are mostly mechanical with exception of faultaround code: filemap_map_pages() got reworked a bit. This patch is preparation for the next one. [1] http://lkml.kernel.org/r/20141020222841.302891540@infradead.org Link: http://lkml.kernel.org/r/1466021202-61880-9-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									dcddffd41d
								
							
						
					
					
						commit
						bae473a423
					
				
					 10 changed files with 474 additions and 515 deletions
				
			
		| 
						 | 
					@ -548,13 +548,13 @@ subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
 | 
				
			||||||
locked. The VM will unlock the page.
 | 
					locked. The VM will unlock the page.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	->map_pages() is called when VM asks to map easy accessible pages.
 | 
						->map_pages() is called when VM asks to map easy accessible pages.
 | 
				
			||||||
Filesystem should find and map pages associated with offsets from "pgoff"
 | 
					Filesystem should find and map pages associated with offsets from "start_pgoff"
 | 
				
			||||||
till "max_pgoff". ->map_pages() is called with page table locked and must
 | 
					till "end_pgoff". ->map_pages() is called with page table locked and must
 | 
				
			||||||
not block.  If it's not possible to reach a page without blocking,
 | 
					not block.  If it's not possible to reach a page without blocking,
 | 
				
			||||||
filesystem should skip it. Filesystem should use do_set_pte() to setup
 | 
					filesystem should skip it. Filesystem should use do_set_pte() to setup
 | 
				
			||||||
page table entry. Pointer to entry associated with offset "pgoff" is
 | 
					page table entry. Pointer to entry associated with the page is passed in
 | 
				
			||||||
passed in "pte" field in vm_fault structure. Pointers to entries for other
 | 
					"pte" field in fault_env structure. Pointers to entries for other offsets
 | 
				
			||||||
offsets should be calculated relative to "pte".
 | 
					should be calculated relative to "pte".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	->page_mkwrite() is called when a previously read-only pte is
 | 
						->page_mkwrite() is called when a previously read-only pte is
 | 
				
			||||||
about to become writeable. The filesystem again must ensure that there are
 | 
					about to become writeable. The filesystem again must ensure that there are
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -257,10 +257,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 | 
				
			||||||
 * fatal_signal_pending()s, and the mmap_sem must be released before
 | 
					 * fatal_signal_pending()s, and the mmap_sem must be released before
 | 
				
			||||||
 * returning it.
 | 
					 * returning it.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
					int handle_userfault(struct fault_env *fe, unsigned long reason)
 | 
				
			||||||
		     unsigned int flags, unsigned long reason)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mm_struct *mm = vma->vm_mm;
 | 
						struct mm_struct *mm = fe->vma->vm_mm;
 | 
				
			||||||
	struct userfaultfd_ctx *ctx;
 | 
						struct userfaultfd_ctx *ctx;
 | 
				
			||||||
	struct userfaultfd_wait_queue uwq;
 | 
						struct userfaultfd_wait_queue uwq;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
| 
						 | 
					@ -269,7 +268,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
				
			||||||
	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 | 
						BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = VM_FAULT_SIGBUS;
 | 
						ret = VM_FAULT_SIGBUS;
 | 
				
			||||||
	ctx = vma->vm_userfaultfd_ctx.ctx;
 | 
						ctx = fe->vma->vm_userfaultfd_ctx.ctx;
 | 
				
			||||||
	if (!ctx)
 | 
						if (!ctx)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -302,17 +301,17 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
				
			||||||
	 * without first stopping userland access to the memory. For
 | 
						 * without first stopping userland access to the memory. For
 | 
				
			||||||
	 * VM_UFFD_MISSING userfaults this is enough for now.
 | 
						 * VM_UFFD_MISSING userfaults this is enough for now.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (unlikely(!(flags & FAULT_FLAG_ALLOW_RETRY))) {
 | 
						if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Validate the invariant that nowait must allow retry
 | 
							 * Validate the invariant that nowait must allow retry
 | 
				
			||||||
		 * to be sure not to return SIGBUS erroneously on
 | 
							 * to be sure not to return SIGBUS erroneously on
 | 
				
			||||||
		 * nowait invocations.
 | 
							 * nowait invocations.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		BUG_ON(flags & FAULT_FLAG_RETRY_NOWAIT);
 | 
							BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
 | 
				
			||||||
#ifdef CONFIG_DEBUG_VM
 | 
					#ifdef CONFIG_DEBUG_VM
 | 
				
			||||||
		if (printk_ratelimit()) {
 | 
							if (printk_ratelimit()) {
 | 
				
			||||||
			printk(KERN_WARNING
 | 
								printk(KERN_WARNING
 | 
				
			||||||
			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", flags);
 | 
								       "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
 | 
				
			||||||
			dump_stack();
 | 
								dump_stack();
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					@ -324,7 +323,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
				
			||||||
	 * and wait.
 | 
						 * and wait.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	ret = VM_FAULT_RETRY;
 | 
						ret = VM_FAULT_RETRY;
 | 
				
			||||||
	if (flags & FAULT_FLAG_RETRY_NOWAIT)
 | 
						if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* take the reference before dropping the mmap_sem */
 | 
						/* take the reference before dropping the mmap_sem */
 | 
				
			||||||
| 
						 | 
					@ -332,10 +331,11 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
 | 
						init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
 | 
				
			||||||
	uwq.wq.private = current;
 | 
						uwq.wq.private = current;
 | 
				
			||||||
	uwq.msg = userfault_msg(address, flags, reason);
 | 
						uwq.msg = userfault_msg(fe->address, fe->flags, reason);
 | 
				
			||||||
	uwq.ctx = ctx;
 | 
						uwq.ctx = ctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 | 
						return_to_userland =
 | 
				
			||||||
 | 
							(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
 | 
				
			||||||
		(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
 | 
							(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&ctx->fault_pending_wqh.lock);
 | 
						spin_lock(&ctx->fault_pending_wqh.lock);
 | 
				
			||||||
| 
						 | 
					@ -353,7 +353,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
				
			||||||
			  TASK_KILLABLE);
 | 
								  TASK_KILLABLE);
 | 
				
			||||||
	spin_unlock(&ctx->fault_pending_wqh.lock);
 | 
						spin_unlock(&ctx->fault_pending_wqh.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	must_wait = userfaultfd_must_wait(ctx, address, flags, reason);
 | 
						must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
 | 
				
			||||||
	up_read(&mm->mmap_sem);
 | 
						up_read(&mm->mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
 | 
						if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,20 +1,12 @@
 | 
				
			||||||
#ifndef _LINUX_HUGE_MM_H
 | 
					#ifndef _LINUX_HUGE_MM_H
 | 
				
			||||||
#define _LINUX_HUGE_MM_H
 | 
					#define _LINUX_HUGE_MM_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
					extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
 | 
				
			||||||
				      struct vm_area_struct *vma,
 | 
					 | 
				
			||||||
				      unsigned long address, pmd_t *pmd,
 | 
					 | 
				
			||||||
				      unsigned int flags);
 | 
					 | 
				
			||||||
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
					extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
				
			||||||
			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 | 
								 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 | 
				
			||||||
			 struct vm_area_struct *vma);
 | 
								 struct vm_area_struct *vma);
 | 
				
			||||||
extern void huge_pmd_set_accessed(struct mm_struct *mm,
 | 
					extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
 | 
				
			||||||
				  struct vm_area_struct *vma,
 | 
					extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
 | 
				
			||||||
				  unsigned long address, pmd_t *pmd,
 | 
					 | 
				
			||||||
				  pmd_t orig_pmd, int dirty);
 | 
					 | 
				
			||||||
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					 | 
				
			||||||
			       unsigned long address, pmd_t *pmd,
 | 
					 | 
				
			||||||
			       pmd_t orig_pmd);
 | 
					 | 
				
			||||||
extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 | 
					extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 | 
				
			||||||
					  unsigned long addr,
 | 
										  unsigned long addr,
 | 
				
			||||||
					  pmd_t *pmd,
 | 
										  pmd_t *pmd,
 | 
				
			||||||
| 
						 | 
					@ -134,8 +126,7 @@ static inline int hpage_nr_pages(struct page *page)
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
 | 
				
			||||||
				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct page *huge_zero_page;
 | 
					extern struct page *huge_zero_page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -196,8 +187,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
 | 
				
			||||||
					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -309,10 +309,27 @@ struct vm_fault {
 | 
				
			||||||
					 * VM_FAULT_DAX_LOCKED and fill in
 | 
										 * VM_FAULT_DAX_LOCKED and fill in
 | 
				
			||||||
					 * entry here.
 | 
										 * entry here.
 | 
				
			||||||
					 */
 | 
										 */
 | 
				
			||||||
	/* for ->map_pages() only */
 | 
					};
 | 
				
			||||||
	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
 | 
					
 | 
				
			||||||
					 * max_pgoff inclusive */
 | 
					/*
 | 
				
			||||||
	pte_t *pte;			/* pte entry associated with ->pgoff */
 | 
					 * Page fault context: passes though page fault handler instead of endless list
 | 
				
			||||||
 | 
					 * of function arguments.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct fault_env {
 | 
				
			||||||
 | 
						struct vm_area_struct *vma;	/* Target VMA */
 | 
				
			||||||
 | 
						unsigned long address;		/* Faulting virtual address */
 | 
				
			||||||
 | 
						unsigned int flags;		/* FAULT_FLAG_xxx flags */
 | 
				
			||||||
 | 
						pmd_t *pmd;			/* Pointer to pmd entry matching
 | 
				
			||||||
 | 
										 * the 'address'
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
 | 
						pte_t *pte;			/* Pointer to pte entry matching
 | 
				
			||||||
 | 
										 * the 'address'. NULL if the page
 | 
				
			||||||
 | 
										 * table hasn't been allocated.
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
 | 
						spinlock_t *ptl;		/* Page table lock.
 | 
				
			||||||
 | 
										 * Protects pte page table if 'pte'
 | 
				
			||||||
 | 
										 * is not NULL, otherwise pmd.
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -327,7 +344,8 @@ struct vm_operations_struct {
 | 
				
			||||||
	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
						int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
				
			||||||
	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
 | 
						int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
 | 
				
			||||||
						pmd_t *, unsigned int flags);
 | 
											pmd_t *, unsigned int flags);
 | 
				
			||||||
	void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
						void (*map_pages)(struct fault_env *fe,
 | 
				
			||||||
 | 
								pgoff_t start_pgoff, pgoff_t end_pgoff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* notification that a previously read-only page is about to become
 | 
						/* notification that a previously read-only page is about to become
 | 
				
			||||||
	 * writable, if an error is returned it will cause a SIGBUS */
 | 
						 * writable, if an error is returned it will cause a SIGBUS */
 | 
				
			||||||
| 
						 | 
					@ -600,8 +618,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 | 
				
			||||||
	return pte;
 | 
						return pte;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 | 
					void do_set_pte(struct fault_env *fe, struct page *page);
 | 
				
			||||||
		struct page *page, pte_t *pte, bool write, bool anon);
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -2062,7 +2079,8 @@ extern void truncate_inode_pages_final(struct address_space *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* generic vm_area_ops exported for stackable file systems */
 | 
					/* generic vm_area_ops exported for stackable file systems */
 | 
				
			||||||
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 | 
					extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 | 
				
			||||||
extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
					extern void filemap_map_pages(struct fault_env *fe,
 | 
				
			||||||
 | 
							pgoff_t start_pgoff, pgoff_t end_pgoff);
 | 
				
			||||||
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
					extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* mm/page-writeback.c */
 | 
					/* mm/page-writeback.c */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,8 +27,7 @@
 | 
				
			||||||
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 | 
					#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 | 
				
			||||||
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 | 
					#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int handle_userfault(struct vm_area_struct *vma, unsigned long address,
 | 
					extern int handle_userfault(struct fault_env *fe, unsigned long reason);
 | 
				
			||||||
			    unsigned int flags, unsigned long reason);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 | 
					extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 | 
				
			||||||
			    unsigned long src_start, unsigned long len);
 | 
								    unsigned long src_start, unsigned long len);
 | 
				
			||||||
| 
						 | 
					@ -56,10 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 | 
				
			||||||
#else /* CONFIG_USERFAULTFD */
 | 
					#else /* CONFIG_USERFAULTFD */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* mm helpers */
 | 
					/* mm helpers */
 | 
				
			||||||
static inline int handle_userfault(struct vm_area_struct *vma,
 | 
					static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
 | 
				
			||||||
				   unsigned long address,
 | 
					 | 
				
			||||||
				   unsigned int flags,
 | 
					 | 
				
			||||||
				   unsigned long reason)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return VM_FAULT_SIGBUS;
 | 
						return VM_FAULT_SIGBUS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										28
									
								
								mm/filemap.c
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								mm/filemap.c
									
									
									
									
									
								
							| 
						 | 
					@ -2128,22 +2128,27 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(filemap_fault);
 | 
					EXPORT_SYMBOL(filemap_fault);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
					void filemap_map_pages(struct fault_env *fe,
 | 
				
			||||||
 | 
							pgoff_t start_pgoff, pgoff_t end_pgoff)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct radix_tree_iter iter;
 | 
						struct radix_tree_iter iter;
 | 
				
			||||||
	void **slot;
 | 
						void **slot;
 | 
				
			||||||
	struct file *file = vma->vm_file;
 | 
						struct file *file = fe->vma->vm_file;
 | 
				
			||||||
	struct address_space *mapping = file->f_mapping;
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
 | 
						pgoff_t last_pgoff = start_pgoff;
 | 
				
			||||||
	loff_t size;
 | 
						loff_t size;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	unsigned long address = (unsigned long) vmf->virtual_address;
 | 
					 | 
				
			||||||
	unsigned long addr;
 | 
					 | 
				
			||||||
	pte_t *pte;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
 | 
						radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
 | 
				
			||||||
		if (iter.index > vmf->max_pgoff)
 | 
								start_pgoff) {
 | 
				
			||||||
 | 
							if (iter.index > end_pgoff)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
							fe->pte += iter.index - last_pgoff;
 | 
				
			||||||
 | 
							fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
 | 
				
			||||||
 | 
							last_pgoff = iter.index;
 | 
				
			||||||
 | 
							if (!pte_none(*fe->pte))
 | 
				
			||||||
 | 
								goto next;
 | 
				
			||||||
repeat:
 | 
					repeat:
 | 
				
			||||||
		page = radix_tree_deref_slot(slot);
 | 
							page = radix_tree_deref_slot(slot);
 | 
				
			||||||
		if (unlikely(!page))
 | 
							if (unlikely(!page))
 | 
				
			||||||
| 
						 | 
					@ -2179,14 +2184,9 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
		if (page->index >= size >> PAGE_SHIFT)
 | 
							if (page->index >= size >> PAGE_SHIFT)
 | 
				
			||||||
			goto unlock;
 | 
								goto unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pte = vmf->pte + page->index - vmf->pgoff;
 | 
					 | 
				
			||||||
		if (!pte_none(*pte))
 | 
					 | 
				
			||||||
			goto unlock;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (file->f_ra.mmap_miss > 0)
 | 
							if (file->f_ra.mmap_miss > 0)
 | 
				
			||||||
			file->f_ra.mmap_miss--;
 | 
								file->f_ra.mmap_miss--;
 | 
				
			||||||
		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
 | 
							do_set_pte(fe, page);
 | 
				
			||||||
		do_set_pte(vma, addr, page, pte, false, false);
 | 
					 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
		goto next;
 | 
							goto next;
 | 
				
			||||||
unlock:
 | 
					unlock:
 | 
				
			||||||
| 
						 | 
					@ -2194,7 +2194,7 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
skip:
 | 
					skip:
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
next:
 | 
					next:
 | 
				
			||||||
		if (iter.index == vmf->max_pgoff)
 | 
							if (iter.index == end_pgoff)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										278
									
								
								mm/huge_memory.c
									
									
									
									
									
								
							
							
						
						
									
										278
									
								
								mm/huge_memory.c
									
									
									
									
									
								
							| 
						 | 
					@ -821,26 +821,23 @@ void prep_transhuge_page(struct page *page)
 | 
				
			||||||
	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 | 
						set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
					static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
 | 
				
			||||||
					struct vm_area_struct *vma,
 | 
							gfp_t gfp)
 | 
				
			||||||
					unsigned long address, pmd_t *pmd,
 | 
					 | 
				
			||||||
					struct page *page, gfp_t gfp,
 | 
					 | 
				
			||||||
					unsigned int flags)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct vm_area_struct *vma = fe->vma;
 | 
				
			||||||
	struct mem_cgroup *memcg;
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
	pgtable_t pgtable;
 | 
						pgtable_t pgtable;
 | 
				
			||||||
	spinlock_t *ptl;
 | 
						unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
	unsigned long haddr = address & HPAGE_PMD_MASK;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_BUG_ON_PAGE(!PageCompound(page), page);
 | 
						VM_BUG_ON_PAGE(!PageCompound(page), page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (mem_cgroup_try_charge(page, mm, gfp, &memcg, true)) {
 | 
						if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
		count_vm_event(THP_FAULT_FALLBACK);
 | 
							count_vm_event(THP_FAULT_FALLBACK);
 | 
				
			||||||
		return VM_FAULT_FALLBACK;
 | 
							return VM_FAULT_FALLBACK;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pgtable = pte_alloc_one(mm, haddr);
 | 
						pgtable = pte_alloc_one(vma->vm_mm, haddr);
 | 
				
			||||||
	if (unlikely(!pgtable)) {
 | 
						if (unlikely(!pgtable)) {
 | 
				
			||||||
		mem_cgroup_cancel_charge(page, memcg, true);
 | 
							mem_cgroup_cancel_charge(page, memcg, true);
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
| 
						 | 
					@ -855,12 +852,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	__SetPageUptodate(page);
 | 
						__SetPageUptodate(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = pmd_lock(mm, pmd);
 | 
						fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
				
			||||||
	if (unlikely(!pmd_none(*pmd))) {
 | 
						if (unlikely(!pmd_none(*fe->pmd))) {
 | 
				
			||||||
		spin_unlock(ptl);
 | 
							spin_unlock(fe->ptl);
 | 
				
			||||||
		mem_cgroup_cancel_charge(page, memcg, true);
 | 
							mem_cgroup_cancel_charge(page, memcg, true);
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
		pte_free(mm, pgtable);
 | 
							pte_free(vma->vm_mm, pgtable);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		pmd_t entry;
 | 
							pmd_t entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -868,12 +865,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
				
			||||||
		if (userfaultfd_missing(vma)) {
 | 
							if (userfaultfd_missing(vma)) {
 | 
				
			||||||
			int ret;
 | 
								int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			spin_unlock(ptl);
 | 
								spin_unlock(fe->ptl);
 | 
				
			||||||
			mem_cgroup_cancel_charge(page, memcg, true);
 | 
								mem_cgroup_cancel_charge(page, memcg, true);
 | 
				
			||||||
			put_page(page);
 | 
								put_page(page);
 | 
				
			||||||
			pte_free(mm, pgtable);
 | 
								pte_free(vma->vm_mm, pgtable);
 | 
				
			||||||
			ret = handle_userfault(vma, address, flags,
 | 
								ret = handle_userfault(fe, VM_UFFD_MISSING);
 | 
				
			||||||
					       VM_UFFD_MISSING);
 | 
					 | 
				
			||||||
			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
								VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
				
			||||||
			return ret;
 | 
								return ret;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -883,11 +879,11 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 | 
				
			||||||
		page_add_new_anon_rmap(page, vma, haddr, true);
 | 
							page_add_new_anon_rmap(page, vma, haddr, true);
 | 
				
			||||||
		mem_cgroup_commit_charge(page, memcg, false, true);
 | 
							mem_cgroup_commit_charge(page, memcg, false, true);
 | 
				
			||||||
		lru_cache_add_active_or_unevictable(page, vma);
 | 
							lru_cache_add_active_or_unevictable(page, vma);
 | 
				
			||||||
		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 | 
							pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, pgtable);
 | 
				
			||||||
		set_pmd_at(mm, haddr, pmd, entry);
 | 
							set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
 | 
				
			||||||
		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
							add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
				
			||||||
		atomic_long_inc(&mm->nr_ptes);
 | 
							atomic_long_inc(&vma->vm_mm->nr_ptes);
 | 
				
			||||||
		spin_unlock(ptl);
 | 
							spin_unlock(fe->ptl);
 | 
				
			||||||
		count_vm_event(THP_FAULT_ALLOC);
 | 
							count_vm_event(THP_FAULT_ALLOC);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -937,13 +933,12 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					int do_huge_pmd_anonymous_page(struct fault_env *fe)
 | 
				
			||||||
			       unsigned long address, pmd_t *pmd,
 | 
					 | 
				
			||||||
			       unsigned int flags)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct vm_area_struct *vma = fe->vma;
 | 
				
			||||||
	gfp_t gfp;
 | 
						gfp_t gfp;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	unsigned long haddr = address & HPAGE_PMD_MASK;
 | 
						unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
 | 
						if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
 | 
				
			||||||
		return VM_FAULT_FALLBACK;
 | 
							return VM_FAULT_FALLBACK;
 | 
				
			||||||
| 
						 | 
					@ -951,42 +946,40 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		return VM_FAULT_OOM;
 | 
							return VM_FAULT_OOM;
 | 
				
			||||||
	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 | 
						if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 | 
				
			||||||
		return VM_FAULT_OOM;
 | 
							return VM_FAULT_OOM;
 | 
				
			||||||
	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
 | 
						if (!(fe->flags & FAULT_FLAG_WRITE) &&
 | 
				
			||||||
 | 
								!mm_forbids_zeropage(vma->vm_mm) &&
 | 
				
			||||||
			transparent_hugepage_use_zero_page()) {
 | 
								transparent_hugepage_use_zero_page()) {
 | 
				
			||||||
		spinlock_t *ptl;
 | 
					 | 
				
			||||||
		pgtable_t pgtable;
 | 
							pgtable_t pgtable;
 | 
				
			||||||
		struct page *zero_page;
 | 
							struct page *zero_page;
 | 
				
			||||||
		bool set;
 | 
							bool set;
 | 
				
			||||||
		int ret;
 | 
							int ret;
 | 
				
			||||||
		pgtable = pte_alloc_one(mm, haddr);
 | 
							pgtable = pte_alloc_one(vma->vm_mm, haddr);
 | 
				
			||||||
		if (unlikely(!pgtable))
 | 
							if (unlikely(!pgtable))
 | 
				
			||||||
			return VM_FAULT_OOM;
 | 
								return VM_FAULT_OOM;
 | 
				
			||||||
		zero_page = get_huge_zero_page();
 | 
							zero_page = get_huge_zero_page();
 | 
				
			||||||
		if (unlikely(!zero_page)) {
 | 
							if (unlikely(!zero_page)) {
 | 
				
			||||||
			pte_free(mm, pgtable);
 | 
								pte_free(vma->vm_mm, pgtable);
 | 
				
			||||||
			count_vm_event(THP_FAULT_FALLBACK);
 | 
								count_vm_event(THP_FAULT_FALLBACK);
 | 
				
			||||||
			return VM_FAULT_FALLBACK;
 | 
								return VM_FAULT_FALLBACK;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		ptl = pmd_lock(mm, pmd);
 | 
							fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
				
			||||||
		ret = 0;
 | 
							ret = 0;
 | 
				
			||||||
		set = false;
 | 
							set = false;
 | 
				
			||||||
		if (pmd_none(*pmd)) {
 | 
							if (pmd_none(*fe->pmd)) {
 | 
				
			||||||
			if (userfaultfd_missing(vma)) {
 | 
								if (userfaultfd_missing(vma)) {
 | 
				
			||||||
				spin_unlock(ptl);
 | 
									spin_unlock(fe->ptl);
 | 
				
			||||||
				ret = handle_userfault(vma, address, flags,
 | 
									ret = handle_userfault(fe, VM_UFFD_MISSING);
 | 
				
			||||||
						       VM_UFFD_MISSING);
 | 
					 | 
				
			||||||
				VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
									VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
				set_huge_zero_page(pgtable, mm, vma,
 | 
									set_huge_zero_page(pgtable, vma->vm_mm, vma,
 | 
				
			||||||
						   haddr, pmd,
 | 
											   haddr, fe->pmd, zero_page);
 | 
				
			||||||
						   zero_page);
 | 
									spin_unlock(fe->ptl);
 | 
				
			||||||
				spin_unlock(ptl);
 | 
					 | 
				
			||||||
				set = true;
 | 
									set = true;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else
 | 
							} else
 | 
				
			||||||
			spin_unlock(ptl);
 | 
								spin_unlock(fe->ptl);
 | 
				
			||||||
		if (!set) {
 | 
							if (!set) {
 | 
				
			||||||
			pte_free(mm, pgtable);
 | 
								pte_free(vma->vm_mm, pgtable);
 | 
				
			||||||
			put_huge_zero_page();
 | 
								put_huge_zero_page();
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
| 
						 | 
					@ -998,8 +991,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		return VM_FAULT_FALLBACK;
 | 
							return VM_FAULT_FALLBACK;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	prep_transhuge_page(page);
 | 
						prep_transhuge_page(page);
 | 
				
			||||||
	return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
 | 
						return __do_huge_pmd_anonymous_page(fe, page, gfp);
 | 
				
			||||||
					    flags);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 | 
					static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
| 
						 | 
					@ -1172,38 +1164,31 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void huge_pmd_set_accessed(struct mm_struct *mm,
 | 
					void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd)
 | 
				
			||||||
			   struct vm_area_struct *vma,
 | 
					 | 
				
			||||||
			   unsigned long address,
 | 
					 | 
				
			||||||
			   pmd_t *pmd, pmd_t orig_pmd,
 | 
					 | 
				
			||||||
			   int dirty)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	spinlock_t *ptl;
 | 
					 | 
				
			||||||
	pmd_t entry;
 | 
						pmd_t entry;
 | 
				
			||||||
	unsigned long haddr;
 | 
						unsigned long haddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = pmd_lock(mm, pmd);
 | 
						fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd);
 | 
				
			||||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
						if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	entry = pmd_mkyoung(orig_pmd);
 | 
						entry = pmd_mkyoung(orig_pmd);
 | 
				
			||||||
	haddr = address & HPAGE_PMD_MASK;
 | 
						haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
	if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
 | 
						if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry,
 | 
				
			||||||
		update_mmu_cache_pmd(vma, address, pmd);
 | 
									fe->flags & FAULT_FLAG_WRITE))
 | 
				
			||||||
 | 
							update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unlock:
 | 
					unlock:
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
					static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,
 | 
				
			||||||
					struct vm_area_struct *vma,
 | 
							struct page *page)
 | 
				
			||||||
					unsigned long address,
 | 
					 | 
				
			||||||
					pmd_t *pmd, pmd_t orig_pmd,
 | 
					 | 
				
			||||||
					struct page *page,
 | 
					 | 
				
			||||||
					unsigned long haddr)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct vm_area_struct *vma = fe->vma;
 | 
				
			||||||
 | 
						unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
	struct mem_cgroup *memcg;
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
	spinlock_t *ptl;
 | 
					 | 
				
			||||||
	pgtable_t pgtable;
 | 
						pgtable_t pgtable;
 | 
				
			||||||
	pmd_t _pmd;
 | 
						pmd_t _pmd;
 | 
				
			||||||
	int ret = 0, i;
 | 
						int ret = 0, i;
 | 
				
			||||||
| 
						 | 
					@ -1220,11 +1205,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
						for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
				
			||||||
		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
 | 
							pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
 | 
				
			||||||
					       __GFP_OTHER_NODE,
 | 
										       __GFP_OTHER_NODE, vma,
 | 
				
			||||||
					       vma, address, page_to_nid(page));
 | 
										       fe->address, page_to_nid(page));
 | 
				
			||||||
		if (unlikely(!pages[i] ||
 | 
							if (unlikely(!pages[i] ||
 | 
				
			||||||
			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
 | 
								     mem_cgroup_try_charge(pages[i], vma->vm_mm,
 | 
				
			||||||
						   &memcg, false))) {
 | 
									     GFP_KERNEL, &memcg, false))) {
 | 
				
			||||||
			if (pages[i])
 | 
								if (pages[i])
 | 
				
			||||||
				put_page(pages[i]);
 | 
									put_page(pages[i]);
 | 
				
			||||||
			while (--i >= 0) {
 | 
								while (--i >= 0) {
 | 
				
			||||||
| 
						 | 
					@ -1250,41 +1235,41 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmun_start = haddr;
 | 
						mmun_start = haddr;
 | 
				
			||||||
	mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
						mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
				
			||||||
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 | 
						mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = pmd_lock(mm, pmd);
 | 
						fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
				
			||||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
						if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
				
			||||||
		goto out_free_pages;
 | 
							goto out_free_pages;
 | 
				
			||||||
	VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
						VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 | 
						pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 | 
				
			||||||
	/* leave pmd empty until pte is filled */
 | 
						/* leave pmd empty until pte is filled */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 | 
						pgtable = pgtable_trans_huge_withdraw(vma->vm_mm, fe->pmd);
 | 
				
			||||||
	pmd_populate(mm, &_pmd, pgtable);
 | 
						pmd_populate(vma->vm_mm, &_pmd, pgtable);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
 | 
						for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
 | 
				
			||||||
		pte_t *pte, entry;
 | 
							pte_t entry;
 | 
				
			||||||
		entry = mk_pte(pages[i], vma->vm_page_prot);
 | 
							entry = mk_pte(pages[i], vma->vm_page_prot);
 | 
				
			||||||
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 | 
							entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 | 
				
			||||||
		memcg = (void *)page_private(pages[i]);
 | 
							memcg = (void *)page_private(pages[i]);
 | 
				
			||||||
		set_page_private(pages[i], 0);
 | 
							set_page_private(pages[i], 0);
 | 
				
			||||||
		page_add_new_anon_rmap(pages[i], vma, haddr, false);
 | 
							page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
 | 
				
			||||||
		mem_cgroup_commit_charge(pages[i], memcg, false, false);
 | 
							mem_cgroup_commit_charge(pages[i], memcg, false, false);
 | 
				
			||||||
		lru_cache_add_active_or_unevictable(pages[i], vma);
 | 
							lru_cache_add_active_or_unevictable(pages[i], vma);
 | 
				
			||||||
		pte = pte_offset_map(&_pmd, haddr);
 | 
							fe->pte = pte_offset_map(&_pmd, haddr);
 | 
				
			||||||
		VM_BUG_ON(!pte_none(*pte));
 | 
							VM_BUG_ON(!pte_none(*fe->pte));
 | 
				
			||||||
		set_pte_at(mm, haddr, pte, entry);
 | 
							set_pte_at(vma->vm_mm, haddr, fe->pte, entry);
 | 
				
			||||||
		pte_unmap(pte);
 | 
							pte_unmap(fe->pte);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	kfree(pages);
 | 
						kfree(pages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	smp_wmb(); /* make pte visible before pmd */
 | 
						smp_wmb(); /* make pte visible before pmd */
 | 
				
			||||||
	pmd_populate(mm, pmd, pgtable);
 | 
						pmd_populate(vma->vm_mm, fe->pmd, pgtable);
 | 
				
			||||||
	page_remove_rmap(page, true);
 | 
						page_remove_rmap(page, true);
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
						mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret |= VM_FAULT_WRITE;
 | 
						ret |= VM_FAULT_WRITE;
 | 
				
			||||||
	put_page(page);
 | 
						put_page(page);
 | 
				
			||||||
| 
						 | 
					@ -1293,8 +1278,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_free_pages:
 | 
					out_free_pages:
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
						mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
				
			||||||
	for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
						for (i = 0; i < HPAGE_PMD_NR; i++) {
 | 
				
			||||||
		memcg = (void *)page_private(pages[i]);
 | 
							memcg = (void *)page_private(pages[i]);
 | 
				
			||||||
		set_page_private(pages[i], 0);
 | 
							set_page_private(pages[i], 0);
 | 
				
			||||||
| 
						 | 
					@ -1305,25 +1290,23 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 | 
				
			||||||
	goto out;
 | 
						goto out;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
 | 
				
			||||||
			unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	spinlock_t *ptl;
 | 
						struct vm_area_struct *vma = fe->vma;
 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
	struct page *page = NULL, *new_page;
 | 
						struct page *page = NULL, *new_page;
 | 
				
			||||||
	struct mem_cgroup *memcg;
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
	unsigned long haddr;
 | 
						unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
	unsigned long mmun_start;	/* For mmu_notifiers */
 | 
						unsigned long mmun_start;	/* For mmu_notifiers */
 | 
				
			||||||
	unsigned long mmun_end;		/* For mmu_notifiers */
 | 
						unsigned long mmun_end;		/* For mmu_notifiers */
 | 
				
			||||||
	gfp_t huge_gfp;			/* for allocation and charge */
 | 
						gfp_t huge_gfp;			/* for allocation and charge */
 | 
				
			||||||
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = pmd_lockptr(mm, pmd);
 | 
						fe->ptl = pmd_lockptr(vma->vm_mm, fe->pmd);
 | 
				
			||||||
	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 | 
						VM_BUG_ON_VMA(!vma->anon_vma, vma);
 | 
				
			||||||
	haddr = address & HPAGE_PMD_MASK;
 | 
					 | 
				
			||||||
	if (is_huge_zero_pmd(orig_pmd))
 | 
						if (is_huge_zero_pmd(orig_pmd))
 | 
				
			||||||
		goto alloc;
 | 
							goto alloc;
 | 
				
			||||||
	spin_lock(ptl);
 | 
						spin_lock(fe->ptl);
 | 
				
			||||||
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
 | 
						if (unlikely(!pmd_same(*fe->pmd, orig_pmd)))
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page = pmd_page(orig_pmd);
 | 
						page = pmd_page(orig_pmd);
 | 
				
			||||||
| 
						 | 
					@ -1336,13 +1319,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		pmd_t entry;
 | 
							pmd_t entry;
 | 
				
			||||||
		entry = pmd_mkyoung(orig_pmd);
 | 
							entry = pmd_mkyoung(orig_pmd);
 | 
				
			||||||
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
							entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
				
			||||||
		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
 | 
							if (pmdp_set_access_flags(vma, haddr, fe->pmd, entry,  1))
 | 
				
			||||||
			update_mmu_cache_pmd(vma, address, pmd);
 | 
								update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
				
			||||||
		ret |= VM_FAULT_WRITE;
 | 
							ret |= VM_FAULT_WRITE;
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	get_page(page);
 | 
						get_page(page);
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
alloc:
 | 
					alloc:
 | 
				
			||||||
	if (transparent_hugepage_enabled(vma) &&
 | 
						if (transparent_hugepage_enabled(vma) &&
 | 
				
			||||||
	    !transparent_hugepage_debug_cow()) {
 | 
						    !transparent_hugepage_debug_cow()) {
 | 
				
			||||||
| 
						 | 
					@ -1355,13 +1338,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		prep_transhuge_page(new_page);
 | 
							prep_transhuge_page(new_page);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		if (!page) {
 | 
							if (!page) {
 | 
				
			||||||
			split_huge_pmd(vma, pmd, address);
 | 
								split_huge_pmd(vma, fe->pmd, fe->address);
 | 
				
			||||||
			ret |= VM_FAULT_FALLBACK;
 | 
								ret |= VM_FAULT_FALLBACK;
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
 | 
								ret = do_huge_pmd_wp_page_fallback(fe, orig_pmd, page);
 | 
				
			||||||
					pmd, orig_pmd, page, haddr);
 | 
					 | 
				
			||||||
			if (ret & VM_FAULT_OOM) {
 | 
								if (ret & VM_FAULT_OOM) {
 | 
				
			||||||
				split_huge_pmd(vma, pmd, address);
 | 
									split_huge_pmd(vma, fe->pmd, fe->address);
 | 
				
			||||||
				ret |= VM_FAULT_FALLBACK;
 | 
									ret |= VM_FAULT_FALLBACK;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			put_page(page);
 | 
								put_page(page);
 | 
				
			||||||
| 
						 | 
					@ -1370,14 +1352,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg,
 | 
						if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
 | 
				
			||||||
					   true))) {
 | 
										huge_gfp, &memcg, true))) {
 | 
				
			||||||
		put_page(new_page);
 | 
							put_page(new_page);
 | 
				
			||||||
		if (page) {
 | 
							split_huge_pmd(vma, fe->pmd, fe->address);
 | 
				
			||||||
			split_huge_pmd(vma, pmd, address);
 | 
							if (page)
 | 
				
			||||||
			put_page(page);
 | 
								put_page(page);
 | 
				
			||||||
		} else
 | 
					 | 
				
			||||||
			split_huge_pmd(vma, pmd, address);
 | 
					 | 
				
			||||||
		ret |= VM_FAULT_FALLBACK;
 | 
							ret |= VM_FAULT_FALLBACK;
 | 
				
			||||||
		count_vm_event(THP_FAULT_FALLBACK);
 | 
							count_vm_event(THP_FAULT_FALLBACK);
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
| 
						 | 
					@ -1393,13 +1373,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmun_start = haddr;
 | 
						mmun_start = haddr;
 | 
				
			||||||
	mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
						mmun_end   = haddr + HPAGE_PMD_SIZE;
 | 
				
			||||||
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 | 
						mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(ptl);
 | 
						spin_lock(fe->ptl);
 | 
				
			||||||
	if (page)
 | 
						if (page)
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
 | 
						if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) {
 | 
				
			||||||
		spin_unlock(ptl);
 | 
							spin_unlock(fe->ptl);
 | 
				
			||||||
		mem_cgroup_cancel_charge(new_page, memcg, true);
 | 
							mem_cgroup_cancel_charge(new_page, memcg, true);
 | 
				
			||||||
		put_page(new_page);
 | 
							put_page(new_page);
 | 
				
			||||||
		goto out_mn;
 | 
							goto out_mn;
 | 
				
			||||||
| 
						 | 
					@ -1407,14 +1387,14 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		pmd_t entry;
 | 
							pmd_t entry;
 | 
				
			||||||
		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 | 
							entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 | 
				
			||||||
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
							entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
				
			||||||
		pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 | 
							pmdp_huge_clear_flush_notify(vma, haddr, fe->pmd);
 | 
				
			||||||
		page_add_new_anon_rmap(new_page, vma, haddr, true);
 | 
							page_add_new_anon_rmap(new_page, vma, haddr, true);
 | 
				
			||||||
		mem_cgroup_commit_charge(new_page, memcg, false, true);
 | 
							mem_cgroup_commit_charge(new_page, memcg, false, true);
 | 
				
			||||||
		lru_cache_add_active_or_unevictable(new_page, vma);
 | 
							lru_cache_add_active_or_unevictable(new_page, vma);
 | 
				
			||||||
		set_pmd_at(mm, haddr, pmd, entry);
 | 
							set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
 | 
				
			||||||
		update_mmu_cache_pmd(vma, address, pmd);
 | 
							update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
				
			||||||
		if (!page) {
 | 
							if (!page) {
 | 
				
			||||||
			add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
								add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 | 
				
			||||||
			put_huge_zero_page();
 | 
								put_huge_zero_page();
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
								VM_BUG_ON_PAGE(!PageHead(page), page);
 | 
				
			||||||
| 
						 | 
					@ -1423,13 +1403,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		ret |= VM_FAULT_WRITE;
 | 
							ret |= VM_FAULT_WRITE;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
out_mn:
 | 
					out_mn:
 | 
				
			||||||
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 | 
						mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
out_unlock:
 | 
					out_unlock:
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1489,13 +1469,12 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* NUMA hinting page fault entry point for trans huge pmds */
 | 
					/* NUMA hinting page fault entry point for trans huge pmds */
 | 
				
			||||||
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
 | 
				
			||||||
				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	spinlock_t *ptl;
 | 
						struct vm_area_struct *vma = fe->vma;
 | 
				
			||||||
	struct anon_vma *anon_vma = NULL;
 | 
						struct anon_vma *anon_vma = NULL;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	unsigned long haddr = addr & HPAGE_PMD_MASK;
 | 
						unsigned long haddr = fe->address & HPAGE_PMD_MASK;
 | 
				
			||||||
	int page_nid = -1, this_nid = numa_node_id();
 | 
						int page_nid = -1, this_nid = numa_node_id();
 | 
				
			||||||
	int target_nid, last_cpupid = -1;
 | 
						int target_nid, last_cpupid = -1;
 | 
				
			||||||
	bool page_locked;
 | 
						bool page_locked;
 | 
				
			||||||
| 
						 | 
					@ -1506,8 +1485,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	/* A PROT_NONE fault should not end up here */
 | 
						/* A PROT_NONE fault should not end up here */
 | 
				
			||||||
	BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
 | 
						BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = pmd_lock(mm, pmdp);
 | 
						fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 | 
				
			||||||
	if (unlikely(!pmd_same(pmd, *pmdp)))
 | 
						if (unlikely(!pmd_same(pmd, *fe->pmd)))
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -1515,9 +1494,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	 * without disrupting NUMA hinting information. Do not relock and
 | 
						 * without disrupting NUMA hinting information. Do not relock and
 | 
				
			||||||
	 * check_same as the page may no longer be mapped.
 | 
						 * check_same as the page may no longer be mapped.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (unlikely(pmd_trans_migrating(*pmdp))) {
 | 
						if (unlikely(pmd_trans_migrating(*fe->pmd))) {
 | 
				
			||||||
		page = pmd_page(*pmdp);
 | 
							page = pmd_page(*fe->pmd);
 | 
				
			||||||
		spin_unlock(ptl);
 | 
							spin_unlock(fe->ptl);
 | 
				
			||||||
		wait_on_page_locked(page);
 | 
							wait_on_page_locked(page);
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -1550,7 +1529,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Migration could have started since the pmd_trans_migrating check */
 | 
						/* Migration could have started since the pmd_trans_migrating check */
 | 
				
			||||||
	if (!page_locked) {
 | 
						if (!page_locked) {
 | 
				
			||||||
		spin_unlock(ptl);
 | 
							spin_unlock(fe->ptl);
 | 
				
			||||||
		wait_on_page_locked(page);
 | 
							wait_on_page_locked(page);
 | 
				
			||||||
		page_nid = -1;
 | 
							page_nid = -1;
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
| 
						 | 
					@ -1561,12 +1540,12 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	 * to serialises splits
 | 
						 * to serialises splits
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	get_page(page);
 | 
						get_page(page);
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
	anon_vma = page_lock_anon_vma_read(page);
 | 
						anon_vma = page_lock_anon_vma_read(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Confirm the PMD did not change while page_table_lock was released */
 | 
						/* Confirm the PMD did not change while page_table_lock was released */
 | 
				
			||||||
	spin_lock(ptl);
 | 
						spin_lock(fe->ptl);
 | 
				
			||||||
	if (unlikely(!pmd_same(pmd, *pmdp))) {
 | 
						if (unlikely(!pmd_same(pmd, *fe->pmd))) {
 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
		page_nid = -1;
 | 
							page_nid = -1;
 | 
				
			||||||
| 
						 | 
					@ -1584,9 +1563,9 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	 * Migrate the THP to the requested node, returns with page unlocked
 | 
						 * Migrate the THP to the requested node, returns with page unlocked
 | 
				
			||||||
	 * and access rights restored.
 | 
						 * and access rights restored.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
	migrated = migrate_misplaced_transhuge_page(mm, vma,
 | 
						migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
 | 
				
			||||||
				pmdp, pmd, addr, page, target_nid);
 | 
									fe->pmd, pmd, fe->address, page, target_nid);
 | 
				
			||||||
	if (migrated) {
 | 
						if (migrated) {
 | 
				
			||||||
		flags |= TNF_MIGRATED;
 | 
							flags |= TNF_MIGRATED;
 | 
				
			||||||
		page_nid = target_nid;
 | 
							page_nid = target_nid;
 | 
				
			||||||
| 
						 | 
					@ -1601,18 +1580,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
				
			||||||
	pmd = pmd_mkyoung(pmd);
 | 
						pmd = pmd_mkyoung(pmd);
 | 
				
			||||||
	if (was_writable)
 | 
						if (was_writable)
 | 
				
			||||||
		pmd = pmd_mkwrite(pmd);
 | 
							pmd = pmd_mkwrite(pmd);
 | 
				
			||||||
	set_pmd_at(mm, haddr, pmdp, pmd);
 | 
						set_pmd_at(vma->vm_mm, haddr, fe->pmd, pmd);
 | 
				
			||||||
	update_mmu_cache_pmd(vma, addr, pmdp);
 | 
						update_mmu_cache_pmd(vma, fe->address, fe->pmd);
 | 
				
			||||||
	unlock_page(page);
 | 
						unlock_page(page);
 | 
				
			||||||
out_unlock:
 | 
					out_unlock:
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(fe->ptl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	if (anon_vma)
 | 
						if (anon_vma)
 | 
				
			||||||
		page_unlock_anon_vma_read(anon_vma);
 | 
							page_unlock_anon_vma_read(anon_vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (page_nid != -1)
 | 
						if (page_nid != -1)
 | 
				
			||||||
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
 | 
							task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, fe->flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2413,20 +2392,23 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 | 
				
			||||||
					struct vm_area_struct *vma,
 | 
										struct vm_area_struct *vma,
 | 
				
			||||||
					unsigned long address, pmd_t *pmd)
 | 
										unsigned long address, pmd_t *pmd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long _address;
 | 
						pte_t pteval;
 | 
				
			||||||
	pte_t *pte, pteval;
 | 
					 | 
				
			||||||
	int swapped_in = 0, ret = 0;
 | 
						int swapped_in = 0, ret = 0;
 | 
				
			||||||
 | 
						struct fault_env fe = {
 | 
				
			||||||
 | 
							.vma = vma,
 | 
				
			||||||
 | 
							.address = address,
 | 
				
			||||||
 | 
							.flags = FAULT_FLAG_ALLOW_RETRY,
 | 
				
			||||||
 | 
							.pmd = pmd,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pte = pte_offset_map(pmd, address);
 | 
						fe.pte = pte_offset_map(pmd, address);
 | 
				
			||||||
	for (_address = address; _address < address + HPAGE_PMD_NR*PAGE_SIZE;
 | 
						for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
 | 
				
			||||||
	     pte++, _address += PAGE_SIZE) {
 | 
								fe.pte++, fe.address += PAGE_SIZE) {
 | 
				
			||||||
		pteval = *pte;
 | 
							pteval = *fe.pte;
 | 
				
			||||||
		if (!is_swap_pte(pteval))
 | 
							if (!is_swap_pte(pteval))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		swapped_in++;
 | 
							swapped_in++;
 | 
				
			||||||
		ret = do_swap_page(mm, vma, _address, pte, pmd,
 | 
							ret = do_swap_page(&fe, pteval);
 | 
				
			||||||
				   FAULT_FLAG_ALLOW_RETRY,
 | 
					 | 
				
			||||||
				   pteval);
 | 
					 | 
				
			||||||
		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 | 
							/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 | 
				
			||||||
		if (ret & VM_FAULT_RETRY) {
 | 
							if (ret & VM_FAULT_RETRY) {
 | 
				
			||||||
			down_read(&mm->mmap_sem);
 | 
								down_read(&mm->mmap_sem);
 | 
				
			||||||
| 
						 | 
					@ -2442,10 +2424,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 | 
				
			||||||
			return false;
 | 
								return false;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		/* pte is unmapped now, we need to map it */
 | 
							/* pte is unmapped now, we need to map it */
 | 
				
			||||||
		pte = pte_offset_map(pmd, _address);
 | 
							fe.pte = pte_offset_map(pmd, fe.address);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	pte--;
 | 
						fe.pte--;
 | 
				
			||||||
	pte_unmap(pte);
 | 
						pte_unmap(fe.pte);
 | 
				
			||||||
	trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
 | 
						trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -36,9 +36,7 @@
 | 
				
			||||||
/* Do not use these with a slab allocator */
 | 
					/* Do not use these with a slab allocator */
 | 
				
			||||||
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 | 
					#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 | 
					int do_swap_page(struct fault_env *fe, pte_t orig_pte);
 | 
				
			||||||
			unsigned long address, pte_t *page_table, pmd_t *pmd,
 | 
					 | 
				
			||||||
			unsigned int flags, pte_t orig_pte);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 | 
					void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 | 
				
			||||||
		unsigned long floor, unsigned long ceiling);
 | 
							unsigned long floor, unsigned long ceiling);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										582
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										582
									
								
								mm/memory.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
					@ -1809,7 +1809,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(filemap_fault);
 | 
					EXPORT_SYMBOL(filemap_fault);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
					void filemap_map_pages(struct fault_env *fe,
 | 
				
			||||||
 | 
							pgoff_t start_pgoff, pgoff_t end_pgoff)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	BUG();
 | 
						BUG();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue