mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm: remove the now-unnecessary mmget_still_valid() hack
The preceding patches have ensured that core dumping properly takes the mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all its users. Signed-off-by: Jann Horn <jannh@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Hugh Dickins <hughd@google.com> Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									7f3bfab52c
								
							
						
					
					
						commit
						4d45e75a99
					
				
					 8 changed files with 32 additions and 110 deletions
				
			
		| 
						 | 
					@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 | 
				
			||||||
		 * will only be one mm, so no big deal.
 | 
							 * will only be one mm, so no big deal.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		mmap_read_lock(mm);
 | 
							mmap_read_lock(mm);
 | 
				
			||||||
		if (!mmget_still_valid(mm))
 | 
					 | 
				
			||||||
			goto skip_mm;
 | 
					 | 
				
			||||||
		mutex_lock(&ufile->umap_lock);
 | 
							mutex_lock(&ufile->umap_lock);
 | 
				
			||||||
		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 | 
							list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 | 
				
			||||||
					  list) {
 | 
										  list) {
 | 
				
			||||||
| 
						 | 
					@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		mutex_unlock(&ufile->umap_lock);
 | 
							mutex_unlock(&ufile->umap_lock);
 | 
				
			||||||
	skip_mm:
 | 
					 | 
				
			||||||
		mmap_read_unlock(mm);
 | 
							mmap_read_unlock(mm);
 | 
				
			||||||
		mmput(mm);
 | 
							mmput(mm);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			mmap_read_lock(mm);
 | 
								mmap_read_lock(mm);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		if (mmget_still_valid(mm)) {
 | 
							if (try) {
 | 
				
			||||||
			if (try) {
 | 
								if (!mutex_trylock(&vdev->vma_lock)) {
 | 
				
			||||||
				if (!mutex_trylock(&vdev->vma_lock)) {
 | 
									mmap_read_unlock(mm);
 | 
				
			||||||
					mmap_read_unlock(mm);
 | 
									mmput(mm);
 | 
				
			||||||
					mmput(mm);
 | 
									return 0;
 | 
				
			||||||
					return 0;
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				mutex_lock(&vdev->vma_lock);
 | 
					 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			list_for_each_entry_safe(mmap_vma, tmp,
 | 
							} else {
 | 
				
			||||||
						 &vdev->vma_list, vma_next) {
 | 
								mutex_lock(&vdev->vma_lock);
 | 
				
			||||||
				struct vm_area_struct *vma = mmap_vma->vma;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				if (vma->vm_mm != mm)
 | 
					 | 
				
			||||||
					continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				list_del(&mmap_vma->vma_next);
 | 
					 | 
				
			||||||
				kfree(mmap_vma);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				zap_vma_ptes(vma, vma->vm_start,
 | 
					 | 
				
			||||||
					     vma->vm_end - vma->vm_start);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			mutex_unlock(&vdev->vma_lock);
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							list_for_each_entry_safe(mmap_vma, tmp,
 | 
				
			||||||
 | 
										 &vdev->vma_list, vma_next) {
 | 
				
			||||||
 | 
								struct vm_area_struct *vma = mmap_vma->vma;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (vma->vm_mm != mm)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								list_del(&mmap_vma->vma_next);
 | 
				
			||||||
 | 
								kfree(mmap_vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								zap_vma_ptes(vma, vma->vm_start,
 | 
				
			||||||
 | 
									     vma->vm_end - vma->vm_start);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							mutex_unlock(&vdev->vma_lock);
 | 
				
			||||||
		mmap_read_unlock(mm);
 | 
							mmap_read_unlock(mm);
 | 
				
			||||||
		mmput(mm);
 | 
							mmput(mm);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 | 
				
			||||||
					count = -EINTR;
 | 
										count = -EINTR;
 | 
				
			||||||
					goto out_mm;
 | 
										goto out_mm;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				/*
 | 
					 | 
				
			||||||
				 * Avoid to modify vma->vm_flags
 | 
					 | 
				
			||||||
				 * without locked ops while the
 | 
					 | 
				
			||||||
				 * coredump reads the vm_flags.
 | 
					 | 
				
			||||||
				 */
 | 
					 | 
				
			||||||
				if (!mmget_still_valid(mm)) {
 | 
					 | 
				
			||||||
					/*
 | 
					 | 
				
			||||||
					 * Silently return "count"
 | 
					 | 
				
			||||||
					 * like if get_task_mm()
 | 
					 | 
				
			||||||
					 * failed. FIXME: should this
 | 
					 | 
				
			||||||
					 * function have returned
 | 
					 | 
				
			||||||
					 * -ESRCH if get_task_mm()
 | 
					 | 
				
			||||||
					 * failed like if
 | 
					 | 
				
			||||||
					 * get_proc_task() fails?
 | 
					 | 
				
			||||||
					 */
 | 
					 | 
				
			||||||
					mmap_write_unlock(mm);
 | 
					 | 
				
			||||||
					goto out_mm;
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
									for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
				
			||||||
					vma->vm_flags &= ~VM_SOFTDIRTY;
 | 
										vma->vm_flags &= ~VM_SOFTDIRTY;
 | 
				
			||||||
					vma_set_page_prot(vma);
 | 
										vma_set_page_prot(vma);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* the various vma->vm_userfaultfd_ctx still points to it */
 | 
							/* the various vma->vm_userfaultfd_ctx still points to it */
 | 
				
			||||||
		mmap_write_lock(mm);
 | 
							mmap_write_lock(mm);
 | 
				
			||||||
		/* no task can run (and in turn coredump) yet */
 | 
					 | 
				
			||||||
		VM_WARN_ON(!mmget_still_valid(mm));
 | 
					 | 
				
			||||||
		for (vma = mm->mmap; vma; vma = vma->vm_next)
 | 
							for (vma = mm->mmap; vma; vma = vma->vm_next)
 | 
				
			||||||
			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
 | 
								if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
 | 
				
			||||||
				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
									vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
				
			||||||
| 
						 | 
					@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
				
			||||||
	/* len == 0 means wake all */
 | 
						/* len == 0 means wake all */
 | 
				
			||||||
	struct userfaultfd_wake_range range = { .len = 0, };
 | 
						struct userfaultfd_wake_range range = { .len = 0, };
 | 
				
			||||||
	unsigned long new_flags;
 | 
						unsigned long new_flags;
 | 
				
			||||||
	bool still_valid;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	WRITE_ONCE(ctx->released, true);
 | 
						WRITE_ONCE(ctx->released, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
				
			||||||
	 * taking the mmap_lock for writing.
 | 
						 * taking the mmap_lock for writing.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	mmap_write_lock(mm);
 | 
						mmap_write_lock(mm);
 | 
				
			||||||
	still_valid = mmget_still_valid(mm);
 | 
					 | 
				
			||||||
	prev = NULL;
 | 
						prev = NULL;
 | 
				
			||||||
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
						for (vma = mm->mmap; vma; vma = vma->vm_next) {
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
| 
						 | 
					@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
 | 
							new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
 | 
				
			||||||
		if (still_valid) {
 | 
							prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
 | 
				
			||||||
			prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
 | 
									 new_flags, vma->anon_vma,
 | 
				
			||||||
					 new_flags, vma->anon_vma,
 | 
									 vma->vm_file, vma->vm_pgoff,
 | 
				
			||||||
					 vma->vm_file, vma->vm_pgoff,
 | 
									 vma_policy(vma),
 | 
				
			||||||
					 vma_policy(vma),
 | 
									 NULL_VM_UFFD_CTX);
 | 
				
			||||||
					 NULL_VM_UFFD_CTX);
 | 
							if (prev)
 | 
				
			||||||
			if (prev)
 | 
								vma = prev;
 | 
				
			||||||
				vma = prev;
 | 
							else
 | 
				
			||||||
			else
 | 
								prev = vma;
 | 
				
			||||||
				prev = vma;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		vma->vm_flags = new_flags;
 | 
							vma->vm_flags = new_flags;
 | 
				
			||||||
		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
							vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmap_write_lock(mm);
 | 
						mmap_write_lock(mm);
 | 
				
			||||||
	if (!mmget_still_valid(mm))
 | 
					 | 
				
			||||||
		goto out_unlock;
 | 
					 | 
				
			||||||
	vma = find_vma_prev(mm, start, &prev);
 | 
						vma = find_vma_prev(mm, start, &prev);
 | 
				
			||||||
	if (!vma)
 | 
						if (!vma)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
| 
						 | 
					@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmap_write_lock(mm);
 | 
						mmap_write_lock(mm);
 | 
				
			||||||
	if (!mmget_still_valid(mm))
 | 
					 | 
				
			||||||
		goto out_unlock;
 | 
					 | 
				
			||||||
	vma = find_vma_prev(mm, start, &prev);
 | 
						vma = find_vma_prev(mm, start, &prev);
 | 
				
			||||||
	if (!vma)
 | 
						if (!vma)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
 | 
				
			||||||
		__mmdrop(mm);
 | 
							__mmdrop(mm);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * This has to be called after a get_task_mm()/mmget_not_zero()
 | 
					 | 
				
			||||||
 * followed by taking the mmap_lock for writing before modifying the
 | 
					 | 
				
			||||||
 * vmas or anything the coredump pretends not to change from under it.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * It also has to be called when mmgrab() is used in the context of
 | 
					 | 
				
			||||||
 * the process, but then the mm_count refcount is transferred outside
 | 
					 | 
				
			||||||
 * the context of the process to run down_write() on that pinned mm.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * NOTE: find_extend_vma() called from GUP context is the only place
 | 
					 | 
				
			||||||
 * that can modify the "mm" (notably the vm_start/end) under mmap_lock
 | 
					 | 
				
			||||||
 * for reading and outside the context of the process, so it is also
 | 
					 | 
				
			||||||
 * the only case that holds the mmap_lock for reading that must call
 | 
					 | 
				
			||||||
 * this function. Generally if the mmap_lock is hold for reading
 | 
					 | 
				
			||||||
 * there's no need of this check after get_task_mm()/mmget_not_zero().
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This function can be obsoleted and the check can be removed, after
 | 
					 | 
				
			||||||
 * the coredump code will hold the mmap_lock for writing before
 | 
					 | 
				
			||||||
 * invoking the ->core_dump methods.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline bool mmget_still_valid(struct mm_struct *mm)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return likely(!mm->core_state);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * mmget() - Pin the address space associated with a &struct mm_struct.
 | 
					 * mmget() - Pin the address space associated with a &struct mm_struct.
 | 
				
			||||||
 * @mm: The address space to pin.
 | 
					 * @mm: The address space to pin.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -434,7 +434,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int khugepaged_test_exit(struct mm_struct *mm)
 | 
					static inline int khugepaged_test_exit(struct mm_struct *mm)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
 | 
						return atomic_read(&mm->mm_users) == 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool hugepage_vma_check(struct vm_area_struct *vma,
 | 
					static bool hugepage_vma_check(struct vm_area_struct *vma,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										17
									
								
								mm/madvise.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								mm/madvise.c
									
									
									
									
									
								
							| 
						 | 
					@ -1085,23 +1085,6 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 | 
				
			||||||
	if (write) {
 | 
						if (write) {
 | 
				
			||||||
		if (mmap_write_lock_killable(current->mm))
 | 
							if (mmap_write_lock_killable(current->mm))
 | 
				
			||||||
			return -EINTR;
 | 
								return -EINTR;
 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * We may have stolen the mm from another process
 | 
					 | 
				
			||||||
		 * that is undergoing core dumping.
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * Right now that's io_ring, in the future it may
 | 
					 | 
				
			||||||
		 * be remote process management and not "current"
 | 
					 | 
				
			||||||
		 * at all.
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * We need to fix core dumping to not do this,
 | 
					 | 
				
			||||||
		 * but for now we have the mmget_still_valid()
 | 
					 | 
				
			||||||
		 * model.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (!mmget_still_valid(current->mm)) {
 | 
					 | 
				
			||||||
			mmap_write_unlock(current->mm);
 | 
					 | 
				
			||||||
			return -EINTR;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		mmap_read_lock(current->mm);
 | 
							mmap_read_lock(current->mm);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2562,7 +2562,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 | 
				
			||||||
	if (vma && (vma->vm_start <= addr))
 | 
						if (vma && (vma->vm_start <= addr))
 | 
				
			||||||
		return vma;
 | 
							return vma;
 | 
				
			||||||
	/* don't alter vm_end if the coredump is running */
 | 
						/* don't alter vm_end if the coredump is running */
 | 
				
			||||||
	if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
 | 
						if (!prev || expand_stack(prev, addr))
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	if (prev->vm_flags & VM_LOCKED)
 | 
						if (prev->vm_flags & VM_LOCKED)
 | 
				
			||||||
		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 | 
							populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 | 
				
			||||||
| 
						 | 
					@ -2588,9 +2588,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 | 
				
			||||||
		return vma;
 | 
							return vma;
 | 
				
			||||||
	if (!(vma->vm_flags & VM_GROWSDOWN))
 | 
						if (!(vma->vm_flags & VM_GROWSDOWN))
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	/* don't alter vm_start if the coredump is running */
 | 
					 | 
				
			||||||
	if (!mmget_still_valid(mm))
 | 
					 | 
				
			||||||
		return NULL;
 | 
					 | 
				
			||||||
	start = vma->vm_start;
 | 
						start = vma->vm_start;
 | 
				
			||||||
	if (expand_stack(vma, addr))
 | 
						if (expand_stack(vma, addr))
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue