mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
walk_page_range() silently skips vma having VM_PFNMAP set, which leads to undesirable behaviour at client end (who called walk_page_range). For example for pagemap_read(), when no callbacks are called against VM_PFNMAP vma, pagemap_read() may prepare pagemap data for next virtual address range at wrong index. That could confuse and/or break userspace applications. This patch avoid this misbehavior caused by vma(VM_PFNMAP) like follows: - for pagemap_read() which has its own ->pte_hole(), call the ->pte_hole() over vma(VM_PFNMAP), - for clear_refs and queue_pages which have their own ->tests_walk, just return 1 and skip vma(VM_PFNMAP). This is no problem because these are not interested in hole regions, - for other callers, just skip the vma(VM_PFNMAP) as a default behavior. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Shiraz Hashim <shashim@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									6f4576e368
								
							
						
					
					
						commit
						48684a65b4
					
				
					 3 changed files with 19 additions and 8 deletions
				
			
		| 
						 | 
					@ -806,6 +806,9 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 | 
				
			||||||
	struct clear_refs_private *cp = walk->private;
 | 
						struct clear_refs_private *cp = walk->private;
 | 
				
			||||||
	struct vm_area_struct *vma = walk->vma;
 | 
						struct vm_area_struct *vma = walk->vma;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (vma->vm_flags & VM_PFNMAP)
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Writing 1 to /proc/pid/clear_refs affects all pages.
 | 
						 * Writing 1 to /proc/pid/clear_refs affects all pages.
 | 
				
			||||||
	 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
 | 
						 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -591,6 +591,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 | 
				
			||||||
	unsigned long endvma = vma->vm_end;
 | 
						unsigned long endvma = vma->vm_end;
 | 
				
			||||||
	unsigned long flags = qp->flags;
 | 
						unsigned long flags = qp->flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (vma->vm_flags & VM_PFNMAP)
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (endvma > end)
 | 
						if (endvma > end)
 | 
				
			||||||
		endvma = end;
 | 
							endvma = end;
 | 
				
			||||||
	if (vma->vm_start > start)
 | 
						if (vma->vm_start > start)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,7 +35,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
again:
 | 
					again:
 | 
				
			||||||
		next = pmd_addr_end(addr, end);
 | 
							next = pmd_addr_end(addr, end);
 | 
				
			||||||
		if (pmd_none(*pmd)) {
 | 
							if (pmd_none(*pmd) || !walk->vma) {
 | 
				
			||||||
			if (walk->pte_hole)
 | 
								if (walk->pte_hole)
 | 
				
			||||||
				err = walk->pte_hole(addr, next, walk);
 | 
									err = walk->pte_hole(addr, next, walk);
 | 
				
			||||||
			if (err)
 | 
								if (err)
 | 
				
			||||||
| 
						 | 
					@ -165,9 +165,6 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 | 
				
			||||||
 * or skip it via the returned value. Return 0 if we do walk over the
 | 
					 * or skip it via the returned value. Return 0 if we do walk over the
 | 
				
			||||||
 * current vma, and return 1 if we skip the vma. Negative values means
 | 
					 * current vma, and return 1 if we skip the vma. Negative values means
 | 
				
			||||||
 * error, where we abort the current walk.
 | 
					 * error, where we abort the current walk.
 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Default check (only VM_PFNMAP check for now) is used when the caller
 | 
					 | 
				
			||||||
 * doesn't define test_walk() callback.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int walk_page_test(unsigned long start, unsigned long end,
 | 
					static int walk_page_test(unsigned long start, unsigned long end,
 | 
				
			||||||
			struct mm_walk *walk)
 | 
								struct mm_walk *walk)
 | 
				
			||||||
| 
						 | 
					@ -178,11 +175,19 @@ static int walk_page_test(unsigned long start, unsigned long end,
 | 
				
			||||||
		return walk->test_walk(start, end, walk);
 | 
							return walk->test_walk(start, end, walk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Do not walk over vma(VM_PFNMAP), because we have no valid struct
 | 
						 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
 | 
				
			||||||
	 * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
 | 
						 * range, so we don't walk over it as we do for normal vmas. However,
 | 
				
			||||||
 | 
						 * Some callers are interested in handling hole range and they don't
 | 
				
			||||||
 | 
						 * want to just ignore any single address range. Such users certainly
 | 
				
			||||||
 | 
						 * define their ->pte_hole() callbacks, so let's delegate them to handle
 | 
				
			||||||
 | 
						 * vma(VM_PFNMAP).
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (vma->vm_flags & VM_PFNMAP)
 | 
						if (vma->vm_flags & VM_PFNMAP) {
 | 
				
			||||||
		return 1;
 | 
							int err = 1;
 | 
				
			||||||
 | 
							if (walk->pte_hole)
 | 
				
			||||||
 | 
								err = walk->pte_hole(start, end, walk);
 | 
				
			||||||
 | 
							return err ? err : 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue