forked from mirrors/linux
		
	mm: isolate mmap internal logic to mm/vma.c
In previous commits we effected improvements to the mmap() logic in mmap_region() and its newly introduced internal implementation function __mmap_region(). However as these changes are intended to be backported, we kept the delta as small as is possible and made as few changes as possible to the newly introduced mm/vma.* files. Take the opportunity to move this logic to mm/vma.c which not only isolates it, but also makes it available for later userland testing which can help us catch such logic errors far earlier. Link: https://lkml.kernel.org/r/93fc2c3aa37dd30590b7e4ee067dfd832007bf7e.1729858176.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Liam R. Howlett <Liam.Howlett@Oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Xu <peterx@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									c14f8046cd
								
							
						
					
					
						commit
						52956b0d7f
					
				
					 4 changed files with 329 additions and 330 deletions
				
			
		
							
								
								
									
										234
									
								
								mm/mmap.c
									
									
									
									
									
								
							
							
						
						
									
										234
									
								
								mm/mmap.c
									
									
									
									
									
								
							|  | @ -577,22 +577,6 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) | ||||||
| } | } | ||||||
| #endif /* __ARCH_WANT_SYS_OLD_MMAP */ | #endif /* __ARCH_WANT_SYS_OLD_MMAP */ | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * We account for memory if it's a private writeable mapping, |  | ||||||
|  * not hugepages and VM_NORESERVE wasn't set. |  | ||||||
|  */ |  | ||||||
| static inline bool accountable_mapping(struct file *file, vm_flags_t vm_flags) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	 * hugetlb has its own accounting separate from the core VM |  | ||||||
| 	 * VM_HUGETLB may not be set yet so we cannot check for that flag. |  | ||||||
| 	 */ |  | ||||||
| 	if (file && is_file_hugepages(file)) |  | ||||||
| 		return false; |  | ||||||
| 
 |  | ||||||
| 	return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * unmapped_area() - Find an area between the low_limit and the high_limit with |  * unmapped_area() - Find an area between the low_limit and the high_limit with | ||||||
|  * the correct alignment and offset, all from @info. Note: current->mm is used |  * the correct alignment and offset, all from @info. Note: current->mm is used | ||||||
|  | @ -1362,224 +1346,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, | ||||||
| 	return do_vmi_munmap(&vmi, mm, start, len, uf, false); | 	return do_vmi_munmap(&vmi, mm, start, len, uf, false); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static unsigned long __mmap_region(struct file *file, unsigned long addr, |  | ||||||
| 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, |  | ||||||
| 		struct list_head *uf) |  | ||||||
| { |  | ||||||
| 	struct mm_struct *mm = current->mm; |  | ||||||
| 	struct vm_area_struct *vma = NULL; |  | ||||||
| 	pgoff_t pglen = PHYS_PFN(len); |  | ||||||
| 	unsigned long charged = 0; |  | ||||||
| 	struct vma_munmap_struct vms; |  | ||||||
| 	struct ma_state mas_detach; |  | ||||||
| 	struct maple_tree mt_detach; |  | ||||||
| 	unsigned long end = addr + len; |  | ||||||
| 	int error; |  | ||||||
| 	VMA_ITERATOR(vmi, mm, addr); |  | ||||||
| 	VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); |  | ||||||
| 
 |  | ||||||
| 	vmg.file = file; |  | ||||||
| 	/* Find the first overlapping VMA */ |  | ||||||
| 	vma = vma_find(&vmi, end); |  | ||||||
| 	init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false); |  | ||||||
| 	if (vma) { |  | ||||||
| 		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); |  | ||||||
| 		mt_on_stack(mt_detach); |  | ||||||
| 		mas_init(&mas_detach, &mt_detach, /* addr = */ 0); |  | ||||||
| 		/* Prepare to unmap any existing mapping in the area */ |  | ||||||
| 		error = vms_gather_munmap_vmas(&vms, &mas_detach); |  | ||||||
| 		if (error) |  | ||||||
| 			goto gather_failed; |  | ||||||
| 
 |  | ||||||
| 		vmg.next = vms.next; |  | ||||||
| 		vmg.prev = vms.prev; |  | ||||||
| 		vma = NULL; |  | ||||||
| 	} else { |  | ||||||
| 		vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/* Check against address space limit. */ |  | ||||||
| 	if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) { |  | ||||||
| 		error = -ENOMEM; |  | ||||||
| 		goto abort_munmap; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Private writable mapping: check memory availability |  | ||||||
| 	 */ |  | ||||||
| 	if (accountable_mapping(file, vm_flags)) { |  | ||||||
| 		charged = pglen; |  | ||||||
| 		charged -= vms.nr_accounted; |  | ||||||
| 		if (charged) { |  | ||||||
| 			error = security_vm_enough_memory_mm(mm, charged); |  | ||||||
| 			if (error) |  | ||||||
| 				goto abort_munmap; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		vms.nr_accounted = 0; |  | ||||||
| 		vm_flags |= VM_ACCOUNT; |  | ||||||
| 		vmg.flags = vm_flags; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * clear PTEs while the vma is still in the tree so that rmap |  | ||||||
| 	 * cannot race with the freeing later in the truncate scenario. |  | ||||||
| 	 * This is also needed for mmap_file(), which is why vm_ops |  | ||||||
| 	 * close function is called. |  | ||||||
| 	 */ |  | ||||||
| 	vms_clean_up_area(&vms, &mas_detach); |  | ||||||
| 	vma = vma_merge_new_range(&vmg); |  | ||||||
| 	if (vma) |  | ||||||
| 		goto expanded; |  | ||||||
| 	/*
 |  | ||||||
| 	 * Determine the object being mapped and call the appropriate |  | ||||||
| 	 * specific mapper. the address has already been validated, but |  | ||||||
| 	 * not unmapped, but the maps are removed from the list. |  | ||||||
| 	 */ |  | ||||||
| 	vma = vm_area_alloc(mm); |  | ||||||
| 	if (!vma) { |  | ||||||
| 		error = -ENOMEM; |  | ||||||
| 		goto unacct_error; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	vma_iter_config(&vmi, addr, end); |  | ||||||
| 	vma_set_range(vma, addr, end, pgoff); |  | ||||||
| 	vm_flags_init(vma, vm_flags); |  | ||||||
| 	vma->vm_page_prot = vm_get_page_prot(vm_flags); |  | ||||||
| 
 |  | ||||||
| 	if (vma_iter_prealloc(&vmi, vma)) { |  | ||||||
| 		error = -ENOMEM; |  | ||||||
| 		goto free_vma; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (file) { |  | ||||||
| 		vma->vm_file = get_file(file); |  | ||||||
| 		error = mmap_file(file, vma); |  | ||||||
| 		if (error) |  | ||||||
| 			goto unmap_and_free_file_vma; |  | ||||||
| 
 |  | ||||||
| 		/* Drivers cannot alter the address of the VMA. */ |  | ||||||
| 		WARN_ON_ONCE(addr != vma->vm_start); |  | ||||||
| 		/*
 |  | ||||||
| 		 * Drivers should not permit writability when previously it was |  | ||||||
| 		 * disallowed. |  | ||||||
| 		 */ |  | ||||||
| 		VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && |  | ||||||
| 				!(vm_flags & VM_MAYWRITE) && |  | ||||||
| 				(vma->vm_flags & VM_MAYWRITE)); |  | ||||||
| 
 |  | ||||||
| 		vma_iter_config(&vmi, addr, end); |  | ||||||
| 		/*
 |  | ||||||
| 		 * If vm_flags changed after mmap_file(), we should try merge |  | ||||||
| 		 * vma again as we may succeed this time. |  | ||||||
| 		 */ |  | ||||||
| 		if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { |  | ||||||
| 			struct vm_area_struct *merge; |  | ||||||
| 
 |  | ||||||
| 			vmg.flags = vma->vm_flags; |  | ||||||
| 			/* If this fails, state is reset ready for a reattempt. */ |  | ||||||
| 			merge = vma_merge_new_range(&vmg); |  | ||||||
| 
 |  | ||||||
| 			if (merge) { |  | ||||||
| 				/*
 |  | ||||||
| 				 * ->mmap() can change vma->vm_file and fput |  | ||||||
| 				 * the original file. So fput the vma->vm_file |  | ||||||
| 				 * here or we would add an extra fput for file |  | ||||||
| 				 * and cause general protection fault |  | ||||||
| 				 * ultimately. |  | ||||||
| 				 */ |  | ||||||
| 				fput(vma->vm_file); |  | ||||||
| 				vm_area_free(vma); |  | ||||||
| 				vma = merge; |  | ||||||
| 				/* Update vm_flags to pick up the change. */ |  | ||||||
| 				vm_flags = vma->vm_flags; |  | ||||||
| 				goto file_expanded; |  | ||||||
| 			} |  | ||||||
| 			vma_iter_config(&vmi, addr, end); |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		vm_flags = vma->vm_flags; |  | ||||||
| 	} else if (vm_flags & VM_SHARED) { |  | ||||||
| 		error = shmem_zero_setup(vma); |  | ||||||
| 		if (error) |  | ||||||
| 			goto free_iter_vma; |  | ||||||
| 	} else { |  | ||||||
| 		vma_set_anonymous(vma); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_SPARC64 |  | ||||||
| 	/* TODO: Fix SPARC ADI! */ |  | ||||||
| 	WARN_ON_ONCE(!arch_validate_flags(vm_flags)); |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| 	/* Lock the VMA since it is modified after insertion into VMA tree */ |  | ||||||
| 	vma_start_write(vma); |  | ||||||
| 	vma_iter_store(&vmi, vma); |  | ||||||
| 	mm->map_count++; |  | ||||||
| 	vma_link_file(vma); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below |  | ||||||
| 	 * call covers the non-merge case. |  | ||||||
| 	 */ |  | ||||||
| 	khugepaged_enter_vma(vma, vma->vm_flags); |  | ||||||
| 
 |  | ||||||
| file_expanded: |  | ||||||
| 	file = vma->vm_file; |  | ||||||
| 	ksm_add_vma(vma); |  | ||||||
| expanded: |  | ||||||
| 	perf_event_mmap(vma); |  | ||||||
| 
 |  | ||||||
| 	/* Unmap any existing mapping in the area */ |  | ||||||
| 	vms_complete_munmap_vmas(&vms, &mas_detach); |  | ||||||
| 
 |  | ||||||
| 	vm_stat_account(mm, vm_flags, pglen); |  | ||||||
| 	if (vm_flags & VM_LOCKED) { |  | ||||||
| 		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || |  | ||||||
| 					is_vm_hugetlb_page(vma) || |  | ||||||
| 					vma == get_gate_vma(current->mm)) |  | ||||||
| 			vm_flags_clear(vma, VM_LOCKED_MASK); |  | ||||||
| 		else |  | ||||||
| 			mm->locked_vm += pglen; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (file) |  | ||||||
| 		uprobe_mmap(vma); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * New (or expanded) vma always get soft dirty status. |  | ||||||
| 	 * Otherwise user-space soft-dirty page tracker won't |  | ||||||
| 	 * be able to distinguish situation when vma area unmapped, |  | ||||||
| 	 * then new mapped in-place (which must be aimed as |  | ||||||
| 	 * a completely new data area). |  | ||||||
| 	 */ |  | ||||||
| 	vm_flags_set(vma, VM_SOFTDIRTY); |  | ||||||
| 
 |  | ||||||
| 	vma_set_page_prot(vma); |  | ||||||
| 
 |  | ||||||
| 	return addr; |  | ||||||
| 
 |  | ||||||
| unmap_and_free_file_vma: |  | ||||||
| 	fput(vma->vm_file); |  | ||||||
| 	vma->vm_file = NULL; |  | ||||||
| 
 |  | ||||||
| 	vma_iter_set(&vmi, vma->vm_end); |  | ||||||
| 	/* Undo any partial mapping done by a device driver. */ |  | ||||||
| 	unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); |  | ||||||
| free_iter_vma: |  | ||||||
| 	vma_iter_free(&vmi); |  | ||||||
| free_vma: |  | ||||||
| 	vm_area_free(vma); |  | ||||||
| unacct_error: |  | ||||||
| 	if (charged) |  | ||||||
| 		vm_unacct_memory(charged); |  | ||||||
| 
 |  | ||||||
| abort_munmap: |  | ||||||
| 	vms_abort_munmap_vmas(&vms, &mas_detach); |  | ||||||
| gather_failed: |  | ||||||
| 	return error; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| unsigned long mmap_region(struct file *file, unsigned long addr, | unsigned long mmap_region(struct file *file, unsigned long addr, | ||||||
| 			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, | 			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, | ||||||
| 			  struct list_head *uf) | 			  struct list_head *uf) | ||||||
|  |  | ||||||
							
								
								
									
										323
									
								
								mm/vma.c
									
									
									
									
									
								
							
							
						
						
									
										323
									
								
								mm/vma.c
									
									
									
									
									
								
							|  | @ -1103,7 +1103,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, | ||||||
| 	vms->clear_ptes = false; | 	vms->clear_ptes = false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void vms_clean_up_area(struct vma_munmap_struct *vms, | static void vms_clean_up_area(struct vma_munmap_struct *vms, | ||||||
| 		struct ma_state *mas_detach) | 		struct ma_state *mas_detach) | ||||||
| { | { | ||||||
| 	struct vm_area_struct *vma; | 	struct vm_area_struct *vma; | ||||||
|  | @ -1126,7 +1126,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms, | ||||||
|  * used for the munmap() and may downgrade the lock - if requested.  Everything |  * used for the munmap() and may downgrade the lock - if requested.  Everything | ||||||
|  * needed to be done once the vma maple tree is updated. |  * needed to be done once the vma maple tree is updated. | ||||||
|  */ |  */ | ||||||
| void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, | static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
| 		struct ma_state *mas_detach) | 		struct ma_state *mas_detach) | ||||||
| { | { | ||||||
| 	struct vm_area_struct *vma; | 	struct vm_area_struct *vma; | ||||||
|  | @ -1167,6 +1167,23 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
| 	__mt_destroy(mas_detach->tree); | 	__mt_destroy(mas_detach->tree); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * reattach_vmas() - Undo any munmap work and free resources | ||||||
|  |  * @mas_detach: The maple state with the detached maple tree | ||||||
|  |  * | ||||||
|  |  * Reattach any detached vmas and free up the maple tree used to track the vmas. | ||||||
|  |  */ | ||||||
|  | static void reattach_vmas(struct ma_state *mas_detach) | ||||||
|  | { | ||||||
|  | 	struct vm_area_struct *vma; | ||||||
|  | 
 | ||||||
|  | 	mas_set(mas_detach, 0); | ||||||
|  | 	mas_for_each(mas_detach, vma, ULONG_MAX) | ||||||
|  | 		vma_mark_detached(vma, false); | ||||||
|  | 
 | ||||||
|  | 	__mt_destroy(mas_detach->tree); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree |  * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree | ||||||
|  * for removal at a later date.  Handles splitting first and last if necessary |  * for removal at a later date.  Handles splitting first and last if necessary | ||||||
|  | @ -1177,7 +1194,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
|  * |  * | ||||||
|  * Return: 0 on success, error otherwise |  * Return: 0 on success, error otherwise | ||||||
|  */ |  */ | ||||||
| int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, | static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
| 		struct ma_state *mas_detach) | 		struct ma_state *mas_detach) | ||||||
| { | { | ||||||
| 	struct vm_area_struct *next = NULL; | 	struct vm_area_struct *next = NULL; | ||||||
|  | @ -1315,6 +1332,39 @@ int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
| 	return error; | 	return error; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * init_vma_munmap() - Initializer wrapper for vma_munmap_struct | ||||||
|  |  * @vms: The vma munmap struct | ||||||
|  |  * @vmi: The vma iterator | ||||||
|  |  * @vma: The first vm_area_struct to munmap | ||||||
|  |  * @start: The aligned start address to munmap | ||||||
|  |  * @end: The aligned end address to munmap | ||||||
|  |  * @uf: The userfaultfd list_head | ||||||
|  |  * @unlock: Unlock after the operation.  Only unlocked on success | ||||||
|  |  */ | ||||||
|  | static void init_vma_munmap(struct vma_munmap_struct *vms, | ||||||
|  | 		struct vma_iterator *vmi, struct vm_area_struct *vma, | ||||||
|  | 		unsigned long start, unsigned long end, struct list_head *uf, | ||||||
|  | 		bool unlock) | ||||||
|  | { | ||||||
|  | 	vms->vmi = vmi; | ||||||
|  | 	vms->vma = vma; | ||||||
|  | 	if (vma) { | ||||||
|  | 		vms->start = start; | ||||||
|  | 		vms->end = end; | ||||||
|  | 	} else { | ||||||
|  | 		vms->start = vms->end = 0; | ||||||
|  | 	} | ||||||
|  | 	vms->unlock = unlock; | ||||||
|  | 	vms->uf = uf; | ||||||
|  | 	vms->vma_count = 0; | ||||||
|  | 	vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; | ||||||
|  | 	vms->exec_vm = vms->stack_vm = vms->data_vm = 0; | ||||||
|  | 	vms->unmap_start = FIRST_USER_ADDRESS; | ||||||
|  | 	vms->unmap_end = USER_PGTABLES_CEILING; | ||||||
|  | 	vms->clear_ptes = false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * do_vmi_align_munmap() - munmap the aligned region from @start to @end. |  * do_vmi_align_munmap() - munmap the aligned region from @start to @end. | ||||||
|  * @vmi: The vma iterator |  * @vmi: The vma iterator | ||||||
|  | @ -2069,3 +2119,270 @@ void mm_drop_all_locks(struct mm_struct *mm) | ||||||
| 
 | 
 | ||||||
| 	mutex_unlock(&mm_all_locks_mutex); | 	mutex_unlock(&mm_all_locks_mutex); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * We account for memory if it's a private writeable mapping, | ||||||
|  |  * not hugepages and VM_NORESERVE wasn't set. | ||||||
|  |  */ | ||||||
|  | static bool accountable_mapping(struct file *file, vm_flags_t vm_flags) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * hugetlb has its own accounting separate from the core VM | ||||||
|  | 	 * VM_HUGETLB may not be set yet so we cannot check for that flag. | ||||||
|  | 	 */ | ||||||
|  | 	if (file && is_file_hugepages(file)) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap() | ||||||
|  |  * operation. | ||||||
|  |  * @vms: The vma unmap structure | ||||||
|  |  * @mas_detach: The maple state with the detached maple tree | ||||||
|  |  * | ||||||
|  |  * Reattach any detached vmas, free up the maple tree used to track the vmas. | ||||||
|  |  * If that's not possible because the ptes are cleared (and vm_ops->closed() may | ||||||
|  |  * have been called), then a NULL is written over the vmas and the vmas are | ||||||
|  |  * removed (munmap() completed). | ||||||
|  |  */ | ||||||
|  | static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms, | ||||||
|  | 		struct ma_state *mas_detach) | ||||||
|  | { | ||||||
|  | 	struct ma_state *mas = &vms->vmi->mas; | ||||||
|  | 
 | ||||||
|  | 	if (!vms->nr_pages) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	if (vms->clear_ptes) | ||||||
|  | 		return reattach_vmas(mas_detach); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Aborting cannot just call the vm_ops open() because they are often | ||||||
|  | 	 * not symmetrical and state data has been lost.  Resort to the old | ||||||
|  | 	 * failure method of leaving a gap where the MAP_FIXED mapping failed. | ||||||
|  | 	 */ | ||||||
|  | 	mas_set_range(mas, vms->start, vms->end - 1); | ||||||
|  | 	mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL); | ||||||
|  | 	/* Clean up the insertion of the unfortunate gap */ | ||||||
|  | 	vms_complete_munmap_vmas(vms, mas_detach); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | unsigned long __mmap_region(struct file *file, unsigned long addr, | ||||||
|  | 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, | ||||||
|  | 		struct list_head *uf) | ||||||
|  | { | ||||||
|  | 	struct mm_struct *mm = current->mm; | ||||||
|  | 	struct vm_area_struct *vma = NULL; | ||||||
|  | 	pgoff_t pglen = PHYS_PFN(len); | ||||||
|  | 	unsigned long charged = 0; | ||||||
|  | 	struct vma_munmap_struct vms; | ||||||
|  | 	struct ma_state mas_detach; | ||||||
|  | 	struct maple_tree mt_detach; | ||||||
|  | 	unsigned long end = addr + len; | ||||||
|  | 	int error; | ||||||
|  | 	VMA_ITERATOR(vmi, mm, addr); | ||||||
|  | 	VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff); | ||||||
|  | 
 | ||||||
|  | 	vmg.file = file; | ||||||
|  | 	/* Find the first overlapping VMA */ | ||||||
|  | 	vma = vma_find(&vmi, end); | ||||||
|  | 	init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false); | ||||||
|  | 	if (vma) { | ||||||
|  | 		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); | ||||||
|  | 		mt_on_stack(mt_detach); | ||||||
|  | 		mas_init(&mas_detach, &mt_detach, /* addr = */ 0); | ||||||
|  | 		/* Prepare to unmap any existing mapping in the area */ | ||||||
|  | 		error = vms_gather_munmap_vmas(&vms, &mas_detach); | ||||||
|  | 		if (error) | ||||||
|  | 			goto gather_failed; | ||||||
|  | 
 | ||||||
|  | 		vmg.next = vms.next; | ||||||
|  | 		vmg.prev = vms.prev; | ||||||
|  | 		vma = NULL; | ||||||
|  | 	} else { | ||||||
|  | 		vmg.next = vma_iter_next_rewind(&vmi, &vmg.prev); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Check against address space limit. */ | ||||||
|  | 	if (!may_expand_vm(mm, vm_flags, pglen - vms.nr_pages)) { | ||||||
|  | 		error = -ENOMEM; | ||||||
|  | 		goto abort_munmap; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Private writable mapping: check memory availability | ||||||
|  | 	 */ | ||||||
|  | 	if (accountable_mapping(file, vm_flags)) { | ||||||
|  | 		charged = pglen; | ||||||
|  | 		charged -= vms.nr_accounted; | ||||||
|  | 		if (charged) { | ||||||
|  | 			error = security_vm_enough_memory_mm(mm, charged); | ||||||
|  | 			if (error) | ||||||
|  | 				goto abort_munmap; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		vms.nr_accounted = 0; | ||||||
|  | 		vm_flags |= VM_ACCOUNT; | ||||||
|  | 		vmg.flags = vm_flags; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * clear PTEs while the vma is still in the tree so that rmap | ||||||
|  | 	 * cannot race with the freeing later in the truncate scenario. | ||||||
|  | 	 * This is also needed for mmap_file(), which is why vm_ops | ||||||
|  | 	 * close function is called. | ||||||
|  | 	 */ | ||||||
|  | 	vms_clean_up_area(&vms, &mas_detach); | ||||||
|  | 	vma = vma_merge_new_range(&vmg); | ||||||
|  | 	if (vma) | ||||||
|  | 		goto expanded; | ||||||
|  | 	/*
 | ||||||
|  | 	 * Determine the object being mapped and call the appropriate | ||||||
|  | 	 * specific mapper. the address has already been validated, but | ||||||
|  | 	 * not unmapped, but the maps are removed from the list. | ||||||
|  | 	 */ | ||||||
|  | 	vma = vm_area_alloc(mm); | ||||||
|  | 	if (!vma) { | ||||||
|  | 		error = -ENOMEM; | ||||||
|  | 		goto unacct_error; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	vma_iter_config(&vmi, addr, end); | ||||||
|  | 	vma_set_range(vma, addr, end, pgoff); | ||||||
|  | 	vm_flags_init(vma, vm_flags); | ||||||
|  | 	vma->vm_page_prot = vm_get_page_prot(vm_flags); | ||||||
|  | 
 | ||||||
|  | 	if (vma_iter_prealloc(&vmi, vma)) { | ||||||
|  | 		error = -ENOMEM; | ||||||
|  | 		goto free_vma; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (file) { | ||||||
|  | 		vma->vm_file = get_file(file); | ||||||
|  | 		error = mmap_file(file, vma); | ||||||
|  | 		if (error) | ||||||
|  | 			goto unmap_and_free_file_vma; | ||||||
|  | 
 | ||||||
|  | 		/* Drivers cannot alter the address of the VMA. */ | ||||||
|  | 		WARN_ON_ONCE(addr != vma->vm_start); | ||||||
|  | 		/*
 | ||||||
|  | 		 * Drivers should not permit writability when previously it was | ||||||
|  | 		 * disallowed. | ||||||
|  | 		 */ | ||||||
|  | 		VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && | ||||||
|  | 				!(vm_flags & VM_MAYWRITE) && | ||||||
|  | 				(vma->vm_flags & VM_MAYWRITE)); | ||||||
|  | 
 | ||||||
|  | 		vma_iter_config(&vmi, addr, end); | ||||||
|  | 		/*
 | ||||||
|  | 		 * If vm_flags changed after mmap_file(), we should try merge | ||||||
|  | 		 * vma again as we may succeed this time. | ||||||
|  | 		 */ | ||||||
|  | 		if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) { | ||||||
|  | 			struct vm_area_struct *merge; | ||||||
|  | 
 | ||||||
|  | 			vmg.flags = vma->vm_flags; | ||||||
|  | 			/* If this fails, state is reset ready for a reattempt. */ | ||||||
|  | 			merge = vma_merge_new_range(&vmg); | ||||||
|  | 
 | ||||||
|  | 			if (merge) { | ||||||
|  | 				/*
 | ||||||
|  | 				 * ->mmap() can change vma->vm_file and fput | ||||||
|  | 				 * the original file. So fput the vma->vm_file | ||||||
|  | 				 * here or we would add an extra fput for file | ||||||
|  | 				 * and cause general protection fault | ||||||
|  | 				 * ultimately. | ||||||
|  | 				 */ | ||||||
|  | 				fput(vma->vm_file); | ||||||
|  | 				vm_area_free(vma); | ||||||
|  | 				vma = merge; | ||||||
|  | 				/* Update vm_flags to pick up the change. */ | ||||||
|  | 				vm_flags = vma->vm_flags; | ||||||
|  | 				goto file_expanded; | ||||||
|  | 			} | ||||||
|  | 			vma_iter_config(&vmi, addr, end); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		vm_flags = vma->vm_flags; | ||||||
|  | 	} else if (vm_flags & VM_SHARED) { | ||||||
|  | 		error = shmem_zero_setup(vma); | ||||||
|  | 		if (error) | ||||||
|  | 			goto free_iter_vma; | ||||||
|  | 	} else { | ||||||
|  | 		vma_set_anonymous(vma); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_SPARC64 | ||||||
|  | 	/* TODO: Fix SPARC ADI! */ | ||||||
|  | 	WARN_ON_ONCE(!arch_validate_flags(vm_flags)); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	/* Lock the VMA since it is modified after insertion into VMA tree */ | ||||||
|  | 	vma_start_write(vma); | ||||||
|  | 	vma_iter_store(&vmi, vma); | ||||||
|  | 	mm->map_count++; | ||||||
|  | 	vma_link_file(vma); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below | ||||||
|  | 	 * call covers the non-merge case. | ||||||
|  | 	 */ | ||||||
|  | 	khugepaged_enter_vma(vma, vma->vm_flags); | ||||||
|  | 
 | ||||||
|  | file_expanded: | ||||||
|  | 	file = vma->vm_file; | ||||||
|  | 	ksm_add_vma(vma); | ||||||
|  | expanded: | ||||||
|  | 	perf_event_mmap(vma); | ||||||
|  | 
 | ||||||
|  | 	/* Unmap any existing mapping in the area */ | ||||||
|  | 	vms_complete_munmap_vmas(&vms, &mas_detach); | ||||||
|  | 
 | ||||||
|  | 	vm_stat_account(mm, vm_flags, pglen); | ||||||
|  | 	if (vm_flags & VM_LOCKED) { | ||||||
|  | 		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || | ||||||
|  | 					is_vm_hugetlb_page(vma) || | ||||||
|  | 					vma == get_gate_vma(current->mm)) | ||||||
|  | 			vm_flags_clear(vma, VM_LOCKED_MASK); | ||||||
|  | 		else | ||||||
|  | 			mm->locked_vm += pglen; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (file) | ||||||
|  | 		uprobe_mmap(vma); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * New (or expanded) vma always get soft dirty status. | ||||||
|  | 	 * Otherwise user-space soft-dirty page tracker won't | ||||||
|  | 	 * be able to distinguish situation when vma area unmapped, | ||||||
|  | 	 * then new mapped in-place (which must be aimed as | ||||||
|  | 	 * a completely new data area). | ||||||
|  | 	 */ | ||||||
|  | 	vm_flags_set(vma, VM_SOFTDIRTY); | ||||||
|  | 
 | ||||||
|  | 	vma_set_page_prot(vma); | ||||||
|  | 
 | ||||||
|  | 	return addr; | ||||||
|  | 
 | ||||||
|  | unmap_and_free_file_vma: | ||||||
|  | 	fput(vma->vm_file); | ||||||
|  | 	vma->vm_file = NULL; | ||||||
|  | 
 | ||||||
|  | 	vma_iter_set(&vmi, vma->vm_end); | ||||||
|  | 	/* Undo any partial mapping done by a device driver. */ | ||||||
|  | 	unmap_region(&vmi.mas, vma, vmg.prev, vmg.next); | ||||||
|  | free_iter_vma: | ||||||
|  | 	vma_iter_free(&vmi); | ||||||
|  | free_vma: | ||||||
|  | 	vm_area_free(vma); | ||||||
|  | unacct_error: | ||||||
|  | 	if (charged) | ||||||
|  | 		vm_unacct_memory(charged); | ||||||
|  | 
 | ||||||
|  | abort_munmap: | ||||||
|  | 	vms_abort_munmap_vmas(&vms, &mas_detach); | ||||||
|  | gather_failed: | ||||||
|  | 	return error; | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										97
									
								
								mm/vma.h
									
									
									
									
									
								
							
							
						
						
									
										97
									
								
								mm/vma.h
									
									
									
									
									
								
							|  | @ -165,99 +165,6 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MMU |  | ||||||
| /*
 |  | ||||||
|  * init_vma_munmap() - Initializer wrapper for vma_munmap_struct |  | ||||||
|  * @vms: The vma munmap struct |  | ||||||
|  * @vmi: The vma iterator |  | ||||||
|  * @vma: The first vm_area_struct to munmap |  | ||||||
|  * @start: The aligned start address to munmap |  | ||||||
|  * @end: The aligned end address to munmap |  | ||||||
|  * @uf: The userfaultfd list_head |  | ||||||
|  * @unlock: Unlock after the operation.  Only unlocked on success |  | ||||||
|  */ |  | ||||||
| static inline void init_vma_munmap(struct vma_munmap_struct *vms, |  | ||||||
| 		struct vma_iterator *vmi, struct vm_area_struct *vma, |  | ||||||
| 		unsigned long start, unsigned long end, struct list_head *uf, |  | ||||||
| 		bool unlock) |  | ||||||
| { |  | ||||||
| 	vms->vmi = vmi; |  | ||||||
| 	vms->vma = vma; |  | ||||||
| 	if (vma) { |  | ||||||
| 		vms->start = start; |  | ||||||
| 		vms->end = end; |  | ||||||
| 	} else { |  | ||||||
| 		vms->start = vms->end = 0; |  | ||||||
| 	} |  | ||||||
| 	vms->unlock = unlock; |  | ||||||
| 	vms->uf = uf; |  | ||||||
| 	vms->vma_count = 0; |  | ||||||
| 	vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; |  | ||||||
| 	vms->exec_vm = vms->stack_vm = vms->data_vm = 0; |  | ||||||
| 	vms->unmap_start = FIRST_USER_ADDRESS; |  | ||||||
| 	vms->unmap_end = USER_PGTABLES_CEILING; |  | ||||||
| 	vms->clear_ptes = false; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, |  | ||||||
| 		struct ma_state *mas_detach); |  | ||||||
| 
 |  | ||||||
| void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, |  | ||||||
| 		struct ma_state *mas_detach); |  | ||||||
| 
 |  | ||||||
| void vms_clean_up_area(struct vma_munmap_struct *vms, |  | ||||||
| 		struct ma_state *mas_detach); |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * reattach_vmas() - Undo any munmap work and free resources |  | ||||||
|  * @mas_detach: The maple state with the detached maple tree |  | ||||||
|  * |  | ||||||
|  * Reattach any detached vmas and free up the maple tree used to track the vmas. |  | ||||||
|  */ |  | ||||||
| static inline void reattach_vmas(struct ma_state *mas_detach) |  | ||||||
| { |  | ||||||
| 	struct vm_area_struct *vma; |  | ||||||
| 
 |  | ||||||
| 	mas_set(mas_detach, 0); |  | ||||||
| 	mas_for_each(mas_detach, vma, ULONG_MAX) |  | ||||||
| 		vma_mark_detached(vma, false); |  | ||||||
| 
 |  | ||||||
| 	__mt_destroy(mas_detach->tree); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap() |  | ||||||
|  * operation. |  | ||||||
|  * @vms: The vma unmap structure |  | ||||||
|  * @mas_detach: The maple state with the detached maple tree |  | ||||||
|  * |  | ||||||
|  * Reattach any detached vmas, free up the maple tree used to track the vmas. |  | ||||||
|  * If that's not possible because the ptes are cleared (and vm_ops->closed() may |  | ||||||
|  * have been called), then a NULL is written over the vmas and the vmas are |  | ||||||
|  * removed (munmap() completed). |  | ||||||
|  */ |  | ||||||
| static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms, |  | ||||||
| 		struct ma_state *mas_detach) |  | ||||||
| { |  | ||||||
| 	struct ma_state *mas = &vms->vmi->mas; |  | ||||||
| 	if (!vms->nr_pages) |  | ||||||
| 		return; |  | ||||||
| 
 |  | ||||||
| 	if (vms->clear_ptes) |  | ||||||
| 		return reattach_vmas(mas_detach); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Aborting cannot just call the vm_ops open() because they are often |  | ||||||
| 	 * not symmetrical and state data has been lost.  Resort to the old |  | ||||||
| 	 * failure method of leaving a gap where the MAP_FIXED mapping failed. |  | ||||||
| 	 */ |  | ||||||
| 	mas_set_range(mas, vms->start, vms->end - 1); |  | ||||||
| 	mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL); |  | ||||||
| 	/* Clean up the insertion of the unfortunate gap */ |  | ||||||
| 	vms_complete_munmap_vmas(vms, mas_detach); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int | int | ||||||
| do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, | do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, | ||||||
| 		    struct mm_struct *mm, unsigned long start, | 		    struct mm_struct *mm, unsigned long start, | ||||||
|  | @ -336,6 +243,10 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); | ||||||
| int mm_take_all_locks(struct mm_struct *mm); | int mm_take_all_locks(struct mm_struct *mm); | ||||||
| void mm_drop_all_locks(struct mm_struct *mm); | void mm_drop_all_locks(struct mm_struct *mm); | ||||||
| 
 | 
 | ||||||
|  | unsigned long __mmap_region(struct file *file, unsigned long addr, | ||||||
|  | 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, | ||||||
|  | 		struct list_head *uf); | ||||||
|  | 
 | ||||||
| static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) | static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) | ||||||
| { | { | ||||||
| 	/*
 | 	/*
 | ||||||
|  |  | ||||||
|  | @ -17,8 +17,10 @@ | ||||||
| #include <linux/file.h> | #include <linux/file.h> | ||||||
| #include <linux/fs.h> | #include <linux/fs.h> | ||||||
| #include <linux/huge_mm.h> | #include <linux/huge_mm.h> | ||||||
|  | #include <linux/hugetlb.h> | ||||||
| #include <linux/hugetlb_inline.h> | #include <linux/hugetlb_inline.h> | ||||||
| #include <linux/kernel.h> | #include <linux/kernel.h> | ||||||
|  | #include <linux/ksm.h> | ||||||
| #include <linux/khugepaged.h> | #include <linux/khugepaged.h> | ||||||
| #include <linux/list.h> | #include <linux/list.h> | ||||||
| #include <linux/maple_tree.h> | #include <linux/maple_tree.h> | ||||||
|  | @ -32,11 +34,14 @@ | ||||||
| #include <linux/mmu_context.h> | #include <linux/mmu_context.h> | ||||||
| #include <linux/mutex.h> | #include <linux/mutex.h> | ||||||
| #include <linux/pagemap.h> | #include <linux/pagemap.h> | ||||||
|  | #include <linux/perf_event.h> | ||||||
| #include <linux/pfn.h> | #include <linux/pfn.h> | ||||||
| #include <linux/rcupdate.h> | #include <linux/rcupdate.h> | ||||||
| #include <linux/rmap.h> | #include <linux/rmap.h> | ||||||
| #include <linux/rwsem.h> | #include <linux/rwsem.h> | ||||||
| #include <linux/sched/signal.h> | #include <linux/sched/signal.h> | ||||||
|  | #include <linux/security.h> | ||||||
|  | #include <linux/shmem_fs.h> | ||||||
| #include <linux/swap.h> | #include <linux/swap.h> | ||||||
| #include <linux/uprobes.h> | #include <linux/uprobes.h> | ||||||
| #include <linux/userfaultfd_k.h> | #include <linux/userfaultfd_k.h> | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Lorenzo Stoakes
						Lorenzo Stoakes