forked from mirrors/linux
		
	mempolicy: apply page table walker on queue_pages_range()
queue_pages_range() does page table walking in its own way now, but there is some code duplicate. This patch applies page table walker to reduce lines of code. queue_pages_range() has to do some precheck to determine whether we really walk over the vma or just skip it. Now we have test_walk() callback in mm_walk for this purpose, so we can do this replacement cleanly. queue_pages_test_walk() depends on not only the current vma but also the previous one, so queue_pages->prev is introduced to remember it. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									1757bbd9c5
								
							
						
					
					
						commit
						6f4576e368
					
				
					 1 changed files with 90 additions and 134 deletions
				
			
		
							
								
								
									
										224
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							
							
						
						
									
										224
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							| 
						 | 
					@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
 | 
				
			||||||
static void migrate_page_add(struct page *page, struct list_head *pagelist,
 | 
					static void migrate_page_add(struct page *page, struct list_head *pagelist,
 | 
				
			||||||
				unsigned long flags);
 | 
									unsigned long flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct queue_pages {
 | 
				
			||||||
 | 
						struct list_head *pagelist;
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
						nodemask_t *nmask;
 | 
				
			||||||
 | 
						struct vm_area_struct *prev;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Scan through pages checking if pages follow certain conditions,
 | 
					 * Scan through pages checking if pages follow certain conditions,
 | 
				
			||||||
 * and move them to the pagelist if they do.
 | 
					 * and move them to the pagelist if they do.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 | 
					static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 | 
				
			||||||
		unsigned long addr, unsigned long end,
 | 
								unsigned long end, struct mm_walk *walk)
 | 
				
			||||||
		const nodemask_t *nodes, unsigned long flags,
 | 
					 | 
				
			||||||
		void *private)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pte_t *orig_pte;
 | 
						struct vm_area_struct *vma = walk->vma;
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
						struct queue_pages *qp = walk->private;
 | 
				
			||||||
 | 
						unsigned long flags = qp->flags;
 | 
				
			||||||
 | 
						int nid;
 | 
				
			||||||
	pte_t *pte;
 | 
						pte_t *pte;
 | 
				
			||||||
	spinlock_t *ptl;
 | 
						spinlock_t *ptl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 | 
						split_huge_page_pmd(vma, addr, pmd);
 | 
				
			||||||
	do {
 | 
						if (pmd_trans_unstable(pmd))
 | 
				
			||||||
		struct page *page;
 | 
							return 0;
 | 
				
			||||||
		int nid;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 | 
				
			||||||
 | 
						for (; addr != end; pte++, addr += PAGE_SIZE) {
 | 
				
			||||||
		if (!pte_present(*pte))
 | 
							if (!pte_present(*pte))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		page = vm_normal_page(vma, addr, *pte);
 | 
							page = vm_normal_page(vma, addr, *pte);
 | 
				
			||||||
| 
						 | 
					@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 | 
				
			||||||
		if (PageReserved(page))
 | 
							if (PageReserved(page))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		nid = page_to_nid(page);
 | 
							nid = page_to_nid(page);
 | 
				
			||||||
		if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
 | 
							if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 | 
							if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 | 
				
			||||||
			migrate_page_add(page, private, flags);
 | 
								migrate_page_add(page, qp->pagelist, flags);
 | 
				
			||||||
		else
 | 
						}
 | 
				
			||||||
			break;
 | 
						pte_unmap_unlock(pte - 1, ptl);
 | 
				
			||||||
	} while (pte++, addr += PAGE_SIZE, addr != end);
 | 
						cond_resched();
 | 
				
			||||||
	pte_unmap_unlock(orig_pte, ptl);
 | 
						return 0;
 | 
				
			||||||
	return addr != end;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
 | 
					static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
 | 
				
			||||||
		pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
 | 
								       unsigned long addr, unsigned long end,
 | 
				
			||||||
				    void *private)
 | 
								       struct mm_walk *walk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_HUGETLB_PAGE
 | 
					#ifdef CONFIG_HUGETLB_PAGE
 | 
				
			||||||
 | 
						struct queue_pages *qp = walk->private;
 | 
				
			||||||
 | 
						unsigned long flags = qp->flags;
 | 
				
			||||||
	int nid;
 | 
						int nid;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	spinlock_t *ptl;
 | 
						spinlock_t *ptl;
 | 
				
			||||||
	pte_t entry;
 | 
						pte_t entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
 | 
						ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
 | 
				
			||||||
	entry = huge_ptep_get((pte_t *)pmd);
 | 
						entry = huge_ptep_get(pte);
 | 
				
			||||||
	if (!pte_present(entry))
 | 
						if (!pte_present(entry))
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
	page = pte_page(entry);
 | 
						page = pte_page(entry);
 | 
				
			||||||
	nid = page_to_nid(page);
 | 
						nid = page_to_nid(page);
 | 
				
			||||||
	if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
 | 
						if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
	/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
 | 
						/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
 | 
				
			||||||
	if (flags & (MPOL_MF_MOVE_ALL) ||
 | 
						if (flags & (MPOL_MF_MOVE_ALL) ||
 | 
				
			||||||
	    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
 | 
						    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
 | 
				
			||||||
		isolate_huge_page(page, private);
 | 
							isolate_huge_page(page, qp->pagelist);
 | 
				
			||||||
unlock:
 | 
					unlock:
 | 
				
			||||||
	spin_unlock(ptl);
 | 
						spin_unlock(ptl);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	BUG();
 | 
						BUG();
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 | 
					 | 
				
			||||||
		unsigned long addr, unsigned long end,
 | 
					 | 
				
			||||||
		const nodemask_t *nodes, unsigned long flags,
 | 
					 | 
				
			||||||
		void *private)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pmd_t *pmd;
 | 
					 | 
				
			||||||
	unsigned long next;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pmd = pmd_offset(pud, addr);
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		next = pmd_addr_end(addr, end);
 | 
					 | 
				
			||||||
		if (!pmd_present(*pmd))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
 | 
					 | 
				
			||||||
			queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
 | 
					 | 
				
			||||||
						flags, private);
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		split_huge_page_pmd(vma, addr, pmd);
 | 
					 | 
				
			||||||
		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
 | 
					 | 
				
			||||||
				    flags, private))
 | 
					 | 
				
			||||||
			return -EIO;
 | 
					 | 
				
			||||||
	} while (pmd++, addr = next, addr != end);
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 | 
					 | 
				
			||||||
		unsigned long addr, unsigned long end,
 | 
					 | 
				
			||||||
		const nodemask_t *nodes, unsigned long flags,
 | 
					 | 
				
			||||||
		void *private)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pud_t *pud;
 | 
					 | 
				
			||||||
	unsigned long next;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pud = pud_offset(pgd, addr);
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		next = pud_addr_end(addr, end);
 | 
					 | 
				
			||||||
		if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		if (pud_none_or_clear_bad(pud))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
 | 
					 | 
				
			||||||
				    flags, private))
 | 
					 | 
				
			||||||
			return -EIO;
 | 
					 | 
				
			||||||
	} while (pud++, addr = next, addr != end);
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
 | 
					 | 
				
			||||||
		unsigned long addr, unsigned long end,
 | 
					 | 
				
			||||||
		const nodemask_t *nodes, unsigned long flags,
 | 
					 | 
				
			||||||
		void *private)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pgd_t *pgd;
 | 
					 | 
				
			||||||
	unsigned long next;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pgd = pgd_offset(vma->vm_mm, addr);
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		next = pgd_addr_end(addr, end);
 | 
					 | 
				
			||||||
		if (pgd_none_or_clear_bad(pgd))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
 | 
					 | 
				
			||||||
				    flags, private))
 | 
					 | 
				
			||||||
			return -EIO;
 | 
					 | 
				
			||||||
	} while (pgd++, addr = next, addr != end);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif /* CONFIG_NUMA_BALANCING */
 | 
					#endif /* CONFIG_NUMA_BALANCING */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int queue_pages_test_walk(unsigned long start, unsigned long end,
 | 
				
			||||||
 | 
									struct mm_walk *walk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct vm_area_struct *vma = walk->vma;
 | 
				
			||||||
 | 
						struct queue_pages *qp = walk->private;
 | 
				
			||||||
 | 
						unsigned long endvma = vma->vm_end;
 | 
				
			||||||
 | 
						unsigned long flags = qp->flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (endvma > end)
 | 
				
			||||||
 | 
							endvma = end;
 | 
				
			||||||
 | 
						if (vma->vm_start > start)
 | 
				
			||||||
 | 
							start = vma->vm_start;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 | 
				
			||||||
 | 
							if (!vma->vm_next && vma->vm_end < end)
 | 
				
			||||||
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
							if (qp->prev && qp->prev->vm_end < vma->vm_start)
 | 
				
			||||||
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						qp->prev = vma;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (vma->vm_flags & VM_PFNMAP)
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & MPOL_MF_LAZY) {
 | 
				
			||||||
 | 
							/* Similar to task_numa_work, skip inaccessible VMAs */
 | 
				
			||||||
 | 
							if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
 | 
				
			||||||
 | 
								change_prot_numa(vma, start, endvma);
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((flags & MPOL_MF_STRICT) ||
 | 
				
			||||||
 | 
						    ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
 | 
				
			||||||
 | 
						     vma_migratable(vma)))
 | 
				
			||||||
 | 
							/* queue pages from current vma */
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Walk through page tables and collect pages to be migrated.
 | 
					 * Walk through page tables and collect pages to be migrated.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int
 | 
					static int
 | 
				
			||||||
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 | 
					queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 | 
				
			||||||
		const nodemask_t *nodes, unsigned long flags, void *private)
 | 
							nodemask_t *nodes, unsigned long flags,
 | 
				
			||||||
 | 
							struct list_head *pagelist)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int err = 0;
 | 
						struct queue_pages qp = {
 | 
				
			||||||
	struct vm_area_struct *vma, *prev;
 | 
							.pagelist = pagelist,
 | 
				
			||||||
 | 
							.flags = flags,
 | 
				
			||||||
 | 
							.nmask = nodes,
 | 
				
			||||||
 | 
							.prev = NULL,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
						struct mm_walk queue_pages_walk = {
 | 
				
			||||||
 | 
							.hugetlb_entry = queue_pages_hugetlb,
 | 
				
			||||||
 | 
							.pmd_entry = queue_pages_pte_range,
 | 
				
			||||||
 | 
							.test_walk = queue_pages_test_walk,
 | 
				
			||||||
 | 
							.mm = mm,
 | 
				
			||||||
 | 
							.private = &qp,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vma = find_vma(mm, start);
 | 
						return walk_page_range(start, end, &queue_pages_walk);
 | 
				
			||||||
	if (!vma)
 | 
					 | 
				
			||||||
		return -EFAULT;
 | 
					 | 
				
			||||||
	prev = NULL;
 | 
					 | 
				
			||||||
	for (; vma && vma->vm_start < end; vma = vma->vm_next) {
 | 
					 | 
				
			||||||
		unsigned long endvma = vma->vm_end;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (endvma > end)
 | 
					 | 
				
			||||||
			endvma = end;
 | 
					 | 
				
			||||||
		if (vma->vm_start > start)
 | 
					 | 
				
			||||||
			start = vma->vm_start;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 | 
					 | 
				
			||||||
			if (!vma->vm_next && vma->vm_end < end)
 | 
					 | 
				
			||||||
				return -EFAULT;
 | 
					 | 
				
			||||||
			if (prev && prev->vm_end < vma->vm_start)
 | 
					 | 
				
			||||||
				return -EFAULT;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (flags & MPOL_MF_LAZY) {
 | 
					 | 
				
			||||||
			/* Similar to task_numa_work, skip inaccessible VMAs */
 | 
					 | 
				
			||||||
			if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
 | 
					 | 
				
			||||||
				change_prot_numa(vma, start, endvma);
 | 
					 | 
				
			||||||
			goto next;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if ((flags & MPOL_MF_STRICT) ||
 | 
					 | 
				
			||||||
		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
 | 
					 | 
				
			||||||
		      vma_migratable(vma))) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			err = queue_pages_pgd_range(vma, start, endvma, nodes,
 | 
					 | 
				
			||||||
						flags, private);
 | 
					 | 
				
			||||||
			if (err)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
next:
 | 
					 | 
				
			||||||
		prev = vma;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return err;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue