forked from mirrors/linux
		
	tmpfs: fix race between umount and swapoff
The use of igrab() in swapoff's shmem_unuse_inode() is just as vulnerable to umount as that in shmem_writepage(). Fix this instance by extending the protection of shmem_swaplist_mutex right across shmem_unuse_inode(): while it's on the list, the inode cannot be evicted (and the filesystem cannot be unmounted) without shmem_evict_inode() taking that mutex to remove it from the list. But since shmem_writepage() might take that mutex, we should avoid making memory allocations or memcg charges while holding it: prepare them at the outer level in shmem_unuse(). When mem_cgroup_cache_charge() was originally placed, we didn't know until that point that the page from swap was actually a shmem page; but nowadays it's noted in the swap_map, so we're safe to charge upfront. For the radix_tree, do as is done in shmem_getpage(): preload upfront, but don't pin to the cpu; so we make a habit of refreshing the node pool, but might dip into GFP_NOWAIT reserves on occasion if subsequently preempted. With the allocation and charge moved out from shmem_unuse_inode(), we can also hold index map and info->lock over from finding the entry. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									b1dea800ac
								
							
						
					
					
						commit
						778dd893ae
					
				
					 1 changed files with 43 additions and 45 deletions
				
			
		
							
								
								
									
										88
									
								
								mm/shmem.c
									
									
									
									
									
								
							
							
						
						
									
										88
									
								
								mm/shmem.c
									
									
									
									
									
								
							| 
						 | 
					@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 | 
					static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inode *inode;
 | 
						struct address_space *mapping;
 | 
				
			||||||
	unsigned long idx;
 | 
						unsigned long idx;
 | 
				
			||||||
	unsigned long size;
 | 
						unsigned long size;
 | 
				
			||||||
	unsigned long limit;
 | 
						unsigned long limit;
 | 
				
			||||||
| 
						 | 
					@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
	if (size > SHMEM_NR_DIRECT)
 | 
						if (size > SHMEM_NR_DIRECT)
 | 
				
			||||||
		size = SHMEM_NR_DIRECT;
 | 
							size = SHMEM_NR_DIRECT;
 | 
				
			||||||
	offset = shmem_find_swp(entry, ptr, ptr+size);
 | 
						offset = shmem_find_swp(entry, ptr, ptr+size);
 | 
				
			||||||
	if (offset >= 0)
 | 
						if (offset >= 0) {
 | 
				
			||||||
 | 
							shmem_swp_balance_unmap();
 | 
				
			||||||
		goto found;
 | 
							goto found;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if (!info->i_indirect)
 | 
						if (!info->i_indirect)
 | 
				
			||||||
		goto lost2;
 | 
							goto lost2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
			if (size > ENTRIES_PER_PAGE)
 | 
								if (size > ENTRIES_PER_PAGE)
 | 
				
			||||||
				size = ENTRIES_PER_PAGE;
 | 
									size = ENTRIES_PER_PAGE;
 | 
				
			||||||
			offset = shmem_find_swp(entry, ptr, ptr+size);
 | 
								offset = shmem_find_swp(entry, ptr, ptr+size);
 | 
				
			||||||
			shmem_swp_unmap(ptr);
 | 
					 | 
				
			||||||
			if (offset >= 0) {
 | 
								if (offset >= 0) {
 | 
				
			||||||
				shmem_dir_unmap(dir);
 | 
									shmem_dir_unmap(dir);
 | 
				
			||||||
				goto found;
 | 
									goto found;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								shmem_swp_unmap(ptr);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
lost1:
 | 
					lost1:
 | 
				
			||||||
| 
						 | 
					@ -928,8 +930,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
found:
 | 
					found:
 | 
				
			||||||
	idx += offset;
 | 
						idx += offset;
 | 
				
			||||||
	inode = igrab(&info->vfs_inode);
 | 
						ptr += offset;
 | 
				
			||||||
	spin_unlock(&info->lock);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Move _head_ to start search for next from here.
 | 
						 * Move _head_ to start search for next from here.
 | 
				
			||||||
| 
						 | 
					@ -940,37 +941,18 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (shmem_swaplist.next != &info->swaplist)
 | 
						if (shmem_swaplist.next != &info->swaplist)
 | 
				
			||||||
		list_move_tail(&shmem_swaplist, &info->swaplist);
 | 
							list_move_tail(&shmem_swaplist, &info->swaplist);
 | 
				
			||||||
	mutex_unlock(&shmem_swaplist_mutex);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	error = 1;
 | 
					 | 
				
			||||||
	if (!inode)
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Charge page using GFP_KERNEL while we can wait.
 | 
						 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
 | 
				
			||||||
	 * Charged back to the user(not to caller) when swap account is used.
 | 
						 * but also to hold up shmem_evict_inode(): so inode cannot be freed
 | 
				
			||||||
	 * add_to_page_cache() will be called with GFP_NOWAIT.
 | 
						 * beneath us (pagelock doesn't help until the page is in pagecache).
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 | 
						mapping = info->vfs_inode.i_mapping;
 | 
				
			||||||
	if (error)
 | 
						error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
 | 
				
			||||||
		goto out;
 | 
						/* which does mem_cgroup_uncharge_cache_page on error */
 | 
				
			||||||
	error = radix_tree_preload(GFP_KERNEL);
 | 
					 | 
				
			||||||
	if (error) {
 | 
					 | 
				
			||||||
		mem_cgroup_uncharge_cache_page(page);
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	error = 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	spin_lock(&info->lock);
 | 
					 | 
				
			||||||
	ptr = shmem_swp_entry(info, idx, NULL);
 | 
					 | 
				
			||||||
	if (ptr && ptr->val == entry.val) {
 | 
					 | 
				
			||||||
		error = add_to_page_cache_locked(page, inode->i_mapping,
 | 
					 | 
				
			||||||
						idx, GFP_NOWAIT);
 | 
					 | 
				
			||||||
		/* does mem_cgroup_uncharge_cache_page on error */
 | 
					 | 
				
			||||||
	} else	/* we must compensate for our precharge above */
 | 
					 | 
				
			||||||
		mem_cgroup_uncharge_cache_page(page);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (error == -EEXIST) {
 | 
						if (error == -EEXIST) {
 | 
				
			||||||
		struct page *filepage = find_get_page(inode->i_mapping, idx);
 | 
							struct page *filepage = find_get_page(mapping, idx);
 | 
				
			||||||
		error = 1;
 | 
							error = 1;
 | 
				
			||||||
		if (filepage) {
 | 
							if (filepage) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
| 
						 | 
					@ -990,14 +972,8 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
		swap_free(entry);
 | 
							swap_free(entry);
 | 
				
			||||||
		error = 1;	/* not an error, but entry was found */
 | 
							error = 1;	/* not an error, but entry was found */
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (ptr)
 | 
						shmem_swp_unmap(ptr);
 | 
				
			||||||
		shmem_swp_unmap(ptr);
 | 
					 | 
				
			||||||
	spin_unlock(&info->lock);
 | 
						spin_unlock(&info->lock);
 | 
				
			||||||
	radix_tree_preload_end();
 | 
					 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	unlock_page(page);
 | 
					 | 
				
			||||||
	page_cache_release(page);
 | 
					 | 
				
			||||||
	iput(inode);		/* allows for NULL */
 | 
					 | 
				
			||||||
	return error;
 | 
						return error;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 | 
				
			||||||
	struct list_head *p, *next;
 | 
						struct list_head *p, *next;
 | 
				
			||||||
	struct shmem_inode_info *info;
 | 
						struct shmem_inode_info *info;
 | 
				
			||||||
	int found = 0;
 | 
						int found = 0;
 | 
				
			||||||
 | 
						int error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Charge page using GFP_KERNEL while we can wait, before taking
 | 
				
			||||||
 | 
						 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 | 
				
			||||||
 | 
						 * Charged back to the user (not to caller) when swap account is used.
 | 
				
			||||||
 | 
						 * add_to_page_cache() will be called with GFP_NOWAIT.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (error)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Try to preload while we can wait, to not make a habit of
 | 
				
			||||||
 | 
						 * draining atomic reserves; but don't latch on to this cpu,
 | 
				
			||||||
 | 
						 * it's okay if sometimes we get rescheduled after this.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						error = radix_tree_preload(GFP_KERNEL);
 | 
				
			||||||
 | 
						if (error)
 | 
				
			||||||
 | 
							goto uncharge;
 | 
				
			||||||
 | 
						radix_tree_preload_end();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&shmem_swaplist_mutex);
 | 
						mutex_lock(&shmem_swaplist_mutex);
 | 
				
			||||||
	list_for_each_safe(p, next, &shmem_swaplist) {
 | 
						list_for_each_safe(p, next, &shmem_swaplist) {
 | 
				
			||||||
| 
						 | 
					@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 | 
				
			||||||
		found = shmem_unuse_inode(info, entry, page);
 | 
							found = shmem_unuse_inode(info, entry, page);
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
		if (found)
 | 
							if (found)
 | 
				
			||||||
			goto out;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	mutex_unlock(&shmem_swaplist_mutex);
 | 
						mutex_unlock(&shmem_swaplist_mutex);
 | 
				
			||||||
	/*
 | 
					
 | 
				
			||||||
	 * Can some race bring us here?  We've been holding page lock,
 | 
					uncharge:
 | 
				
			||||||
	 * so I think not; but would rather try again later than BUG()
 | 
						if (!found)
 | 
				
			||||||
	 */
 | 
							mem_cgroup_uncharge_cache_page(page);
 | 
				
			||||||
 | 
						if (found < 0)
 | 
				
			||||||
 | 
							error = found;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
	unlock_page(page);
 | 
						unlock_page(page);
 | 
				
			||||||
	page_cache_release(page);
 | 
						page_cache_release(page);
 | 
				
			||||||
out:
 | 
						return error;
 | 
				
			||||||
	return (found < 0) ? found : 0;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue