mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm/mempolicy: use vma iterator & maple state instead of vma linked list
Reworked the way mbind_range() finds the first VMA to reuse the maple state and limit the number of tree walks needed. Note, this drops the VM_BUG_ON(!vma) call, which would catch a start address higher than the last VMA. The code was written in a way that allowed no VMA updates to occur and still return success. There should be no functional change to this scenario with the new code. Link: https://lkml.kernel.org/r/20220906194824.2110408-57-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									ba0aff8ea6
								
							
						
					
					
						commit
						66850be55e
					
				
					 1 changed files with 32 additions and 24 deletions
				
			
		| 
						 | 
				
			
			@ -381,9 +381,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
 | 
			
		|||
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	VMA_ITERATOR(vmi, mm, 0);
 | 
			
		||||
 | 
			
		||||
	mmap_write_lock(mm);
 | 
			
		||||
	for (vma = mm->mmap; vma; vma = vma->vm_next)
 | 
			
		||||
	for_each_vma(vmi, vma)
 | 
			
		||||
		mpol_rebind_policy(vma->vm_policy, new);
 | 
			
		||||
	mmap_write_unlock(mm);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -654,7 +655,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
 | 
			
		|||
static int queue_pages_test_walk(unsigned long start, unsigned long end,
 | 
			
		||||
				struct mm_walk *walk)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = walk->vma;
 | 
			
		||||
	struct vm_area_struct *next, *vma = walk->vma;
 | 
			
		||||
	struct queue_pages *qp = walk->private;
 | 
			
		||||
	unsigned long endvma = vma->vm_end;
 | 
			
		||||
	unsigned long flags = qp->flags;
 | 
			
		||||
| 
						 | 
				
			
			@ -669,9 +670,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 | 
			
		|||
			/* hole at head side of range */
 | 
			
		||||
			return -EFAULT;
 | 
			
		||||
	}
 | 
			
		||||
	next = find_vma(vma->vm_mm, vma->vm_end);
 | 
			
		||||
	if (!(flags & MPOL_MF_DISCONTIG_OK) &&
 | 
			
		||||
		((vma->vm_end < qp->end) &&
 | 
			
		||||
		(!vma->vm_next || vma->vm_end < vma->vm_next->vm_start)))
 | 
			
		||||
		(!next || vma->vm_end < next->vm_start)))
 | 
			
		||||
		/* hole at middle or tail of range */
 | 
			
		||||
		return -EFAULT;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -785,26 +787,24 @@ static int vma_replace_policy(struct vm_area_struct *vma,
 | 
			
		|||
static int mbind_range(struct mm_struct *mm, unsigned long start,
 | 
			
		||||
		       unsigned long end, struct mempolicy *new_pol)
 | 
			
		||||
{
 | 
			
		||||
	MA_STATE(mas, &mm->mm_mt, start - 1, start - 1);
 | 
			
		||||
	struct vm_area_struct *prev;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	int err = 0;
 | 
			
		||||
	pgoff_t pgoff;
 | 
			
		||||
	unsigned long vmstart;
 | 
			
		||||
	unsigned long vmend;
 | 
			
		||||
 | 
			
		||||
	vma = find_vma(mm, start);
 | 
			
		||||
	VM_BUG_ON(!vma);
 | 
			
		||||
	prev = mas_find_rev(&mas, 0);
 | 
			
		||||
	if (prev && (start < prev->vm_end))
 | 
			
		||||
		vma = prev;
 | 
			
		||||
	else
 | 
			
		||||
		vma = mas_next(&mas, end - 1);
 | 
			
		||||
 | 
			
		||||
	prev = vma->vm_prev;
 | 
			
		||||
	if (start > vma->vm_start)
 | 
			
		||||
		prev = vma;
 | 
			
		||||
 | 
			
		||||
	for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) {
 | 
			
		||||
		vmstart = max(start, vma->vm_start);
 | 
			
		||||
		vmend   = min(end, vma->vm_end);
 | 
			
		||||
	for (; vma; vma = mas_next(&mas, end - 1)) {
 | 
			
		||||
		unsigned long vmstart = max(start, vma->vm_start);
 | 
			
		||||
		unsigned long vmend = min(end, vma->vm_end);
 | 
			
		||||
 | 
			
		||||
		if (mpol_equal(vma_policy(vma), new_pol))
 | 
			
		||||
			continue;
 | 
			
		||||
			goto next;
 | 
			
		||||
 | 
			
		||||
		pgoff = vma->vm_pgoff +
 | 
			
		||||
			((vmstart - vma->vm_start) >> PAGE_SHIFT);
 | 
			
		||||
| 
						 | 
				
			
			@ -813,6 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 | 
			
		|||
				 new_pol, vma->vm_userfaultfd_ctx,
 | 
			
		||||
				 anon_vma_name(vma));
 | 
			
		||||
		if (prev) {
 | 
			
		||||
			/* vma_merge() invalidated the mas */
 | 
			
		||||
			mas_pause(&mas);
 | 
			
		||||
			vma = prev;
 | 
			
		||||
			goto replace;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -820,19 +822,25 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 | 
			
		|||
			err = split_vma(vma->vm_mm, vma, vmstart, 1);
 | 
			
		||||
			if (err)
 | 
			
		||||
				goto out;
 | 
			
		||||
			/* split_vma() invalidated the mas */
 | 
			
		||||
			mas_pause(&mas);
 | 
			
		||||
		}
 | 
			
		||||
		if (vma->vm_end != vmend) {
 | 
			
		||||
			err = split_vma(vma->vm_mm, vma, vmend, 0);
 | 
			
		||||
			if (err)
 | 
			
		||||
				goto out;
 | 
			
		||||
			/* split_vma() invalidated the mas */
 | 
			
		||||
			mas_pause(&mas);
 | 
			
		||||
		}
 | 
			
		||||
 replace:
 | 
			
		||||
replace:
 | 
			
		||||
		err = vma_replace_policy(vma, new_pol);
 | 
			
		||||
		if (err)
 | 
			
		||||
			goto out;
 | 
			
		||||
next:
 | 
			
		||||
		prev = vma;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 out:
 | 
			
		||||
out:
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1049,6 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 | 
			
		|||
			   int flags)
 | 
			
		||||
{
 | 
			
		||||
	nodemask_t nmask;
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	LIST_HEAD(pagelist);
 | 
			
		||||
	int err = 0;
 | 
			
		||||
	struct migration_target_control mtc = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1064,8 +1073,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 | 
			
		|||
	 * need migration.  Between passing in the full user address
 | 
			
		||||
	 * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
 | 
			
		||||
	 */
 | 
			
		||||
	vma = find_vma(mm, 0);
 | 
			
		||||
	VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
 | 
			
		||||
	queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
 | 
			
		||||
	queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
 | 
			
		||||
			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 | 
			
		||||
 | 
			
		||||
	if (!list_empty(&pagelist)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1195,14 +1205,13 @@ static struct page *new_page(struct page *page, unsigned long start)
 | 
			
		|||
	struct folio *dst, *src = page_folio(page);
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	unsigned long address;
 | 
			
		||||
	VMA_ITERATOR(vmi, current->mm, start);
 | 
			
		||||
	gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
 | 
			
		||||
 | 
			
		||||
	vma = find_vma(current->mm, start);
 | 
			
		||||
	while (vma) {
 | 
			
		||||
	for_each_vma(vmi, vma) {
 | 
			
		||||
		address = page_address_in_vma(page, vma);
 | 
			
		||||
		if (address != -EFAULT)
 | 
			
		||||
			break;
 | 
			
		||||
		vma = vma->vm_next;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (folio_test_hugetlb(src))
 | 
			
		||||
| 
						 | 
				
			
			@ -1480,6 +1489,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
 | 
			
		|||
	unsigned long vmend;
 | 
			
		||||
	unsigned long end;
 | 
			
		||||
	int err = -ENOENT;
 | 
			
		||||
	VMA_ITERATOR(vmi, mm, start);
 | 
			
		||||
 | 
			
		||||
	start = untagged_addr(start);
 | 
			
		||||
	if (start & ~PAGE_MASK)
 | 
			
		||||
| 
						 | 
				
			
			@ -1505,9 +1515,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
 | 
			
		|||
	if (end == start)
 | 
			
		||||
		return 0;
 | 
			
		||||
	mmap_write_lock(mm);
 | 
			
		||||
	vma = find_vma(mm, start);
 | 
			
		||||
	for (; vma && vma->vm_start < end;  vma = vma->vm_next) {
 | 
			
		||||
 | 
			
		||||
	for_each_vma_range(vmi, vma, end) {
 | 
			
		||||
		vmstart = max(start, vma->vm_start);
 | 
			
		||||
		vmend   = min(end, vma->vm_end);
 | 
			
		||||
		new = mpol_dup(vma_policy(vma));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue