mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	tmpfs: fix shmem_swaplist races
Intensive swapoff testing shows shmem_unuse spinning on an entry in shmem_swaplist pointing to itself: how does that come about? Days pass... First guess is this: shmem_delete_inode tests list_empty without taking the global mutex (so the swapping case doesn't slow down the common case); but there's an instant in shmem_unuse_inode's list_move_tail when the list entry may appear empty (a rare case, because it's actually moving the head not the the list member). So there's a danger of leaving the inode on the swaplist when it's freed, then reinitialized to point to itself when reused. Fix that by skipping the list_move_tail when it's a no-op, which happens to plug this. But this same spinning then surfaces on another machine. Ah, I'd never suspected it, but shmem_writepage's swaplist manipulation is unsafe: though we still hold page lock, which would hold off inode deletion if the page were in pagecache, it doesn't hold off once it's in swapcache (free_swap_and_cache doesn't wait on locked pages). Hmm: we could put the the inode on swaplist earlier, but then shmem_unuse_inode could never prune unswapped inodes. Fix this with an igrab before dropping info->lock, as in shmem_unuse_inode; though I am a little uneasy about the iput which has to follow - it works, and I see nothing wrong with it, but it is surprising that shmem inode deletion may now occur below shmem_writepage. Revisit this fix later? And while we're looking at these races: the way shmem_unuse tests swapped without holding info->lock looks unsafe, if we've more than one swap area: a racing shmem_writepage on another page of the same inode could be putting it in swapcache, just as we're deciding to remove the inode from swaplist - there's a danger of going on swap without being listed, so a later swapoff would hang, being unable to locate the entry. Move that test and removal down into shmem_unuse_inode, once info->lock is held. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									b409f9fcf0
								
							
						
					
					
						commit
						1b1b32f2c6
					
				
					 1 changed files with 25 additions and 12 deletions
				
			
		
							
								
								
									
										33
									
								
								mm/shmem.c
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								mm/shmem.c
									
									
									
									
									
								
							| 
						 | 
					@ -833,6 +833,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
	idx = 0;
 | 
						idx = 0;
 | 
				
			||||||
	ptr = info->i_direct;
 | 
						ptr = info->i_direct;
 | 
				
			||||||
	spin_lock(&info->lock);
 | 
						spin_lock(&info->lock);
 | 
				
			||||||
 | 
						if (!info->swapped) {
 | 
				
			||||||
 | 
							list_del_init(&info->swaplist);
 | 
				
			||||||
 | 
							goto lost2;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	limit = info->next_index;
 | 
						limit = info->next_index;
 | 
				
			||||||
	size = limit;
 | 
						size = limit;
 | 
				
			||||||
	if (size > SHMEM_NR_DIRECT)
 | 
						if (size > SHMEM_NR_DIRECT)
 | 
				
			||||||
| 
						 | 
					@ -894,7 +898,14 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 | 
				
			||||||
	inode = igrab(&info->vfs_inode);
 | 
						inode = igrab(&info->vfs_inode);
 | 
				
			||||||
	spin_unlock(&info->lock);
 | 
						spin_unlock(&info->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* move head to start search for next from here */
 | 
						/*
 | 
				
			||||||
 | 
						 * Move _head_ to start search for next from here.
 | 
				
			||||||
 | 
						 * But be careful: shmem_delete_inode checks list_empty without taking
 | 
				
			||||||
 | 
						 * mutex, and there's an instant in list_move_tail when info->swaplist
 | 
				
			||||||
 | 
						 * would appear empty, if it were the only one on shmem_swaplist.  We
 | 
				
			||||||
 | 
						 * could avoid doing it if inode NULL; or use this minor optimization.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (shmem_swaplist.next != &info->swaplist)
 | 
				
			||||||
		list_move_tail(&shmem_swaplist, &info->swaplist);
 | 
							list_move_tail(&shmem_swaplist, &info->swaplist);
 | 
				
			||||||
	mutex_unlock(&shmem_swaplist_mutex);
 | 
						mutex_unlock(&shmem_swaplist_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -955,10 +966,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 | 
				
			||||||
	mutex_lock(&shmem_swaplist_mutex);
 | 
						mutex_lock(&shmem_swaplist_mutex);
 | 
				
			||||||
	list_for_each_safe(p, next, &shmem_swaplist) {
 | 
						list_for_each_safe(p, next, &shmem_swaplist) {
 | 
				
			||||||
		info = list_entry(p, struct shmem_inode_info, swaplist);
 | 
							info = list_entry(p, struct shmem_inode_info, swaplist);
 | 
				
			||||||
		if (info->swapped)
 | 
					 | 
				
			||||||
		found = shmem_unuse_inode(info, entry, page);
 | 
							found = shmem_unuse_inode(info, entry, page);
 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
			list_del_init(&info->swaplist);
 | 
					 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
		if (found)
 | 
							if (found)
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
| 
						 | 
					@ -1021,18 +1029,23 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 | 
				
			||||||
		remove_from_page_cache(page);
 | 
							remove_from_page_cache(page);
 | 
				
			||||||
		shmem_swp_set(info, entry, swap.val);
 | 
							shmem_swp_set(info, entry, swap.val);
 | 
				
			||||||
		shmem_swp_unmap(entry);
 | 
							shmem_swp_unmap(entry);
 | 
				
			||||||
 | 
							if (list_empty(&info->swaplist))
 | 
				
			||||||
 | 
								inode = igrab(inode);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								inode = NULL;
 | 
				
			||||||
		spin_unlock(&info->lock);
 | 
							spin_unlock(&info->lock);
 | 
				
			||||||
		if (list_empty(&info->swaplist)) {
 | 
					 | 
				
			||||||
			mutex_lock(&shmem_swaplist_mutex);
 | 
					 | 
				
			||||||
			/* move instead of add in case we're racing */
 | 
					 | 
				
			||||||
			list_move_tail(&info->swaplist, &shmem_swaplist);
 | 
					 | 
				
			||||||
			mutex_unlock(&shmem_swaplist_mutex);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		swap_duplicate(swap);
 | 
							swap_duplicate(swap);
 | 
				
			||||||
		BUG_ON(page_mapped(page));
 | 
							BUG_ON(page_mapped(page));
 | 
				
			||||||
		page_cache_release(page);	/* pagecache ref */
 | 
							page_cache_release(page);	/* pagecache ref */
 | 
				
			||||||
		set_page_dirty(page);
 | 
							set_page_dirty(page);
 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
 | 
							if (inode) {
 | 
				
			||||||
 | 
								mutex_lock(&shmem_swaplist_mutex);
 | 
				
			||||||
 | 
								/* move instead of add in case we're racing */
 | 
				
			||||||
 | 
								list_move_tail(&info->swaplist, &shmem_swaplist);
 | 
				
			||||||
 | 
								mutex_unlock(&shmem_swaplist_mutex);
 | 
				
			||||||
 | 
								iput(inode);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue