mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: remove the now-unnecessary mmget_still_valid() hack
The preceding patches have ensured that core dumping properly takes the mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all its users. Signed-off-by: Jann Horn <jannh@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Hugh Dickins <hughd@google.com> Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									7f3bfab52c
								
							
						
					
					
						commit
						4d45e75a99
					
				
					 8 changed files with 32 additions and 110 deletions
				
			
		| 
						 | 
				
			
			@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 | 
			
		|||
		 * will only be one mm, so no big deal.
 | 
			
		||||
		 */
 | 
			
		||||
		mmap_read_lock(mm);
 | 
			
		||||
		if (!mmget_still_valid(mm))
 | 
			
		||||
			goto skip_mm;
 | 
			
		||||
		mutex_lock(&ufile->umap_lock);
 | 
			
		||||
		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 | 
			
		||||
					  list) {
 | 
			
		||||
| 
						 | 
				
			
			@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 | 
			
		|||
			}
 | 
			
		||||
		}
 | 
			
		||||
		mutex_unlock(&ufile->umap_lock);
 | 
			
		||||
	skip_mm:
 | 
			
		||||
		mmap_read_unlock(mm);
 | 
			
		||||
		mmput(mm);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1480,7 +1480,6 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
 | 
			
		|||
		} else {
 | 
			
		||||
			mmap_read_lock(mm);
 | 
			
		||||
		}
 | 
			
		||||
		if (mmget_still_valid(mm)) {
 | 
			
		||||
		if (try) {
 | 
			
		||||
			if (!mutex_trylock(&vdev->vma_lock)) {
 | 
			
		||||
				mmap_read_unlock(mm);
 | 
			
		||||
| 
						 | 
				
			
			@ -1504,7 +1503,6 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
 | 
			
		|||
				     vma->vm_end - vma->vm_start);
 | 
			
		||||
		}
 | 
			
		||||
		mutex_unlock(&vdev->vma_lock);
 | 
			
		||||
		}
 | 
			
		||||
		mmap_read_unlock(mm);
 | 
			
		||||
		mmput(mm);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 | 
			
		|||
					count = -EINTR;
 | 
			
		||||
					goto out_mm;
 | 
			
		||||
				}
 | 
			
		||||
				/*
 | 
			
		||||
				 * Avoid to modify vma->vm_flags
 | 
			
		||||
				 * without locked ops while the
 | 
			
		||||
				 * coredump reads the vm_flags.
 | 
			
		||||
				 */
 | 
			
		||||
				if (!mmget_still_valid(mm)) {
 | 
			
		||||
					/*
 | 
			
		||||
					 * Silently return "count"
 | 
			
		||||
					 * like if get_task_mm()
 | 
			
		||||
					 * failed. FIXME: should this
 | 
			
		||||
					 * function have returned
 | 
			
		||||
					 * -ESRCH if get_task_mm()
 | 
			
		||||
					 * failed like if
 | 
			
		||||
					 * get_proc_task() fails?
 | 
			
		||||
					 */
 | 
			
		||||
					mmap_write_unlock(mm);
 | 
			
		||||
					goto out_mm;
 | 
			
		||||
				}
 | 
			
		||||
				for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
			
		||||
					vma->vm_flags &= ~VM_SOFTDIRTY;
 | 
			
		||||
					vma_set_page_prot(vma);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 | 
			
		|||
 | 
			
		||||
		/* the various vma->vm_userfaultfd_ctx still points to it */
 | 
			
		||||
		mmap_write_lock(mm);
 | 
			
		||||
		/* no task can run (and in turn coredump) yet */
 | 
			
		||||
		VM_WARN_ON(!mmget_still_valid(mm));
 | 
			
		||||
		for (vma = mm->mmap; vma; vma = vma->vm_next)
 | 
			
		||||
			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
 | 
			
		||||
				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
			
		||||
| 
						 | 
				
			
			@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
			
		|||
	/* len == 0 means wake all */
 | 
			
		||||
	struct userfaultfd_wake_range range = { .len = 0, };
 | 
			
		||||
	unsigned long new_flags;
 | 
			
		||||
	bool still_valid;
 | 
			
		||||
 | 
			
		||||
	WRITE_ONCE(ctx->released, true);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
			
		|||
	 * taking the mmap_lock for writing.
 | 
			
		||||
	 */
 | 
			
		||||
	mmap_write_lock(mm);
 | 
			
		||||
	still_valid = mmget_still_valid(mm);
 | 
			
		||||
	prev = NULL;
 | 
			
		||||
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
			
		||||
		cond_resched();
 | 
			
		||||
| 
						 | 
				
			
			@ -869,7 +865,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
			
		|||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
 | 
			
		||||
		if (still_valid) {
 | 
			
		||||
		prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
 | 
			
		||||
				 new_flags, vma->anon_vma,
 | 
			
		||||
				 vma->vm_file, vma->vm_pgoff,
 | 
			
		||||
| 
						 | 
				
			
			@ -879,7 +874,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
			
		|||
			vma = prev;
 | 
			
		||||
		else
 | 
			
		||||
			prev = vma;
 | 
			
		||||
		}
 | 
			
		||||
		vma->vm_flags = new_flags;
 | 
			
		||||
		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 | 
			
		|||
		goto out;
 | 
			
		||||
 | 
			
		||||
	mmap_write_lock(mm);
 | 
			
		||||
	if (!mmget_still_valid(mm))
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	vma = find_vma_prev(mm, start, &prev);
 | 
			
		||||
	if (!vma)
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
| 
						 | 
				
			
			@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 | 
			
		|||
		goto out;
 | 
			
		||||
 | 
			
		||||
	mmap_write_lock(mm);
 | 
			
		||||
	if (!mmget_still_valid(mm))
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	vma = find_vma_prev(mm, start, &prev);
 | 
			
		||||
	if (!vma)
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
 | 
			
		|||
		__mmdrop(mm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * This has to be called after a get_task_mm()/mmget_not_zero()
 | 
			
		||||
 * followed by taking the mmap_lock for writing before modifying the
 | 
			
		||||
 * vmas or anything the coredump pretends not to change from under it.
 | 
			
		||||
 *
 | 
			
		||||
 * It also has to be called when mmgrab() is used in the context of
 | 
			
		||||
 * the process, but then the mm_count refcount is transferred outside
 | 
			
		||||
 * the context of the process to run down_write() on that pinned mm.
 | 
			
		||||
 *
 | 
			
		||||
 * NOTE: find_extend_vma() called from GUP context is the only place
 | 
			
		||||
 * that can modify the "mm" (notably the vm_start/end) under mmap_lock
 | 
			
		||||
 * for reading and outside the context of the process, so it is also
 | 
			
		||||
 * the only case that holds the mmap_lock for reading that must call
 | 
			
		||||
 * this function. Generally if the mmap_lock is hold for reading
 | 
			
		||||
 * there's no need of this check after get_task_mm()/mmget_not_zero().
 | 
			
		||||
 *
 | 
			
		||||
 * This function can be obsoleted and the check can be removed, after
 | 
			
		||||
 * the coredump code will hold the mmap_lock for writing before
 | 
			
		||||
 * invoking the ->core_dump methods.
 | 
			
		||||
 */
 | 
			
		||||
static inline bool mmget_still_valid(struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	return likely(!mm->core_state);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * mmget() - Pin the address space associated with a &struct mm_struct.
 | 
			
		||||
 * @mm: The address space to pin.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -434,7 +434,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
 | 
			
		|||
 | 
			
		||||
static inline int khugepaged_test_exit(struct mm_struct *mm)
 | 
			
		||||
{
 | 
			
		||||
	return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
 | 
			
		||||
	return atomic_read(&mm->mm_users) == 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool hugepage_vma_check(struct vm_area_struct *vma,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										17
									
								
								mm/madvise.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								mm/madvise.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -1085,23 +1085,6 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 | 
			
		|||
	if (write) {
 | 
			
		||||
		if (mmap_write_lock_killable(current->mm))
 | 
			
		||||
			return -EINTR;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We may have stolen the mm from another process
 | 
			
		||||
		 * that is undergoing core dumping.
 | 
			
		||||
		 *
 | 
			
		||||
		 * Right now that's io_ring, in the future it may
 | 
			
		||||
		 * be remote process management and not "current"
 | 
			
		||||
		 * at all.
 | 
			
		||||
		 *
 | 
			
		||||
		 * We need to fix core dumping to not do this,
 | 
			
		||||
		 * but for now we have the mmget_still_valid()
 | 
			
		||||
		 * model.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!mmget_still_valid(current->mm)) {
 | 
			
		||||
			mmap_write_unlock(current->mm);
 | 
			
		||||
			return -EINTR;
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		mmap_read_lock(current->mm);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2562,7 +2562,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 | 
			
		|||
	if (vma && (vma->vm_start <= addr))
 | 
			
		||||
		return vma;
 | 
			
		||||
	/* don't alter vm_end if the coredump is running */
 | 
			
		||||
	if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
 | 
			
		||||
	if (!prev || expand_stack(prev, addr))
 | 
			
		||||
		return NULL;
 | 
			
		||||
	if (prev->vm_flags & VM_LOCKED)
 | 
			
		||||
		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 | 
			
		||||
| 
						 | 
				
			
			@ -2588,9 +2588,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 | 
			
		|||
		return vma;
 | 
			
		||||
	if (!(vma->vm_flags & VM_GROWSDOWN))
 | 
			
		||||
		return NULL;
 | 
			
		||||
	/* don't alter vm_start if the coredump is running */
 | 
			
		||||
	if (!mmget_still_valid(mm))
 | 
			
		||||
		return NULL;
 | 
			
		||||
	start = vma->vm_start;
 | 
			
		||||
	if (expand_stack(vma, addr))
 | 
			
		||||
		return NULL;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue