mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									4520fb3c36
								
							
						
					
					
						commit
						53e872681f
					
				
					 3 changed files with 86 additions and 25 deletions
				
			
		| 
						 | 
				
			
			@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 | 
			
		|||
	block_invalidatepage(page, offset);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ext4_journalled_invalidatepage(struct page *page,
 | 
			
		||||
					   unsigned long offset)
 | 
			
		||||
static int __ext4_journalled_invalidatepage(struct page *page,
 | 
			
		||||
					    unsigned long offset)
 | 
			
		||||
{
 | 
			
		||||
	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
 | 
			
		|||
	if (offset == 0)
 | 
			
		||||
		ClearPageChecked(page);
 | 
			
		||||
 | 
			
		||||
	jbd2_journal_invalidatepage(journal, page, offset);
 | 
			
		||||
	return jbd2_journal_invalidatepage(journal, page, offset);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Wrapper for aops... */
 | 
			
		||||
static void ext4_journalled_invalidatepage(struct page *page,
 | 
			
		||||
					   unsigned long offset)
 | 
			
		||||
{
 | 
			
		||||
	WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ext4_releasepage(struct page *page, gfp_t wait)
 | 
			
		||||
| 
						 | 
				
			
			@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 | 
			
		|||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
 | 
			
		||||
 * buffers that are attached to a page stradding i_size and are undergoing
 | 
			
		||||
 * commit. In that case we have to wait for commit to finish and try again.
 | 
			
		||||
 */
 | 
			
		||||
static void ext4_wait_for_tail_page_commit(struct inode *inode)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned offset;
 | 
			
		||||
	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 | 
			
		||||
	tid_t commit_tid = 0;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
 | 
			
		||||
	/*
 | 
			
		||||
	 * All buffers in the last page remain valid? Then there's nothing to
 | 
			
		||||
	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
 | 
			
		||||
	 * blocksize case
 | 
			
		||||
	 */
 | 
			
		||||
	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
 | 
			
		||||
		return;
 | 
			
		||||
	while (1) {
 | 
			
		||||
		page = find_lock_page(inode->i_mapping,
 | 
			
		||||
				      inode->i_size >> PAGE_CACHE_SHIFT);
 | 
			
		||||
		if (!page)
 | 
			
		||||
			return;
 | 
			
		||||
		ret = __ext4_journalled_invalidatepage(page, offset);
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		page_cache_release(page);
 | 
			
		||||
		if (ret != -EBUSY)
 | 
			
		||||
			return;
 | 
			
		||||
		commit_tid = 0;
 | 
			
		||||
		read_lock(&journal->j_state_lock);
 | 
			
		||||
		if (journal->j_committing_transaction)
 | 
			
		||||
			commit_tid = journal->j_committing_transaction->t_tid;
 | 
			
		||||
		read_unlock(&journal->j_state_lock);
 | 
			
		||||
		if (commit_tid)
 | 
			
		||||
			jbd2_log_wait_commit(journal, commit_tid);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * ext4_setattr()
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (attr->ia_valid & ATTR_SIZE) {
 | 
			
		||||
		if (attr->ia_size != i_size_read(inode)) {
 | 
			
		||||
			truncate_setsize(inode, attr->ia_size);
 | 
			
		||||
			/* Inode size will be reduced, wait for dio in flight.
 | 
			
		||||
			 * Temporarily disable dioread_nolock to prevent
 | 
			
		||||
			 * livelock. */
 | 
			
		||||
		if (attr->ia_size != inode->i_size) {
 | 
			
		||||
			loff_t oldsize = inode->i_size;
 | 
			
		||||
 | 
			
		||||
			i_size_write(inode, attr->ia_size);
 | 
			
		||||
			/*
 | 
			
		||||
			 * Blocks are going to be removed from the inode. Wait
 | 
			
		||||
			 * for dio in flight.  Temporarily disable
 | 
			
		||||
			 * dioread_nolock to prevent livelock.
 | 
			
		||||
			 */
 | 
			
		||||
			if (orphan) {
 | 
			
		||||
				ext4_inode_block_unlocked_dio(inode);
 | 
			
		||||
				inode_dio_wait(inode);
 | 
			
		||||
				ext4_inode_resume_unlocked_dio(inode);
 | 
			
		||||
				if (!ext4_should_journal_data(inode)) {
 | 
			
		||||
					ext4_inode_block_unlocked_dio(inode);
 | 
			
		||||
					inode_dio_wait(inode);
 | 
			
		||||
					ext4_inode_resume_unlocked_dio(inode);
 | 
			
		||||
				} else
 | 
			
		||||
					ext4_wait_for_tail_page_commit(inode);
 | 
			
		||||
			}
 | 
			
		||||
			/*
 | 
			
		||||
			 * Truncate pagecache after we've waited for commit
 | 
			
		||||
			 * in data=journal mode to make pages freeable.
 | 
			
		||||
			 */
 | 
			
		||||
			truncate_pagecache(inode, oldsize, inode->i_size);
 | 
			
		||||
		}
 | 
			
		||||
		ext4_truncate(inode);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 | 
			
		|||
 | 
			
		||||
	BUFFER_TRACE(bh, "entry");
 | 
			
		||||
 | 
			
		||||
retry:
 | 
			
		||||
	/*
 | 
			
		||||
	 * It is safe to proceed here without the j_list_lock because the
 | 
			
		||||
	 * buffers cannot be stolen by try_to_free_buffers as long as we are
 | 
			
		||||
| 
						 | 
				
			
			@ -1935,14 +1934,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 | 
			
		|||
		 * for commit and try again.
 | 
			
		||||
		 */
 | 
			
		||||
		if (partial_page) {
 | 
			
		||||
			tid_t tid = journal->j_committing_transaction->t_tid;
 | 
			
		||||
 | 
			
		||||
			jbd2_journal_put_journal_head(jh);
 | 
			
		||||
			spin_unlock(&journal->j_list_lock);
 | 
			
		||||
			jbd_unlock_bh_state(bh);
 | 
			
		||||
			write_unlock(&journal->j_state_lock);
 | 
			
		||||
			jbd2_log_wait_commit(journal, tid);
 | 
			
		||||
			goto retry;
 | 
			
		||||
			return -EBUSY;
 | 
			
		||||
		}
 | 
			
		||||
		/*
 | 
			
		||||
		 * OK, buffer won't be reachable after truncate. We just set
 | 
			
		||||
| 
						 | 
				
			
			@ -2003,21 +1999,23 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 | 
			
		|||
 * @page:    page to flush
 | 
			
		||||
 * @offset:  length of page to invalidate.
 | 
			
		||||
 *
 | 
			
		||||
 * Reap page buffers containing data after offset in page.
 | 
			
		||||
 *
 | 
			
		||||
 * Reap page buffers containing data after offset in page. Can return -EBUSY
 | 
			
		||||
 * if buffers are part of the committing transaction and the page is straddling
 | 
			
		||||
 * i_size. Caller then has to wait for current commit and try again.
 | 
			
		||||
 */
 | 
			
		||||
void jbd2_journal_invalidatepage(journal_t *journal,
 | 
			
		||||
		      struct page *page,
 | 
			
		||||
		      unsigned long offset)
 | 
			
		||||
int jbd2_journal_invalidatepage(journal_t *journal,
 | 
			
		||||
				struct page *page,
 | 
			
		||||
				unsigned long offset)
 | 
			
		||||
{
 | 
			
		||||
	struct buffer_head *head, *bh, *next;
 | 
			
		||||
	unsigned int curr_off = 0;
 | 
			
		||||
	int may_free = 1;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	if (!PageLocked(page))
 | 
			
		||||
		BUG();
 | 
			
		||||
	if (!page_has_buffers(page))
 | 
			
		||||
		return;
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	/* We will potentially be playing with lists other than just the
 | 
			
		||||
	 * data lists (especially for journaled data mode), so be
 | 
			
		||||
| 
						 | 
				
			
			@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 | 
			
		|||
		if (offset <= curr_off) {
 | 
			
		||||
			/* This block is wholly outside the truncation point */
 | 
			
		||||
			lock_buffer(bh);
 | 
			
		||||
			may_free &= journal_unmap_buffer(journal, bh,
 | 
			
		||||
							 offset > 0);
 | 
			
		||||
			ret = journal_unmap_buffer(journal, bh, offset > 0);
 | 
			
		||||
			unlock_buffer(bh);
 | 
			
		||||
			if (ret < 0)
 | 
			
		||||
				return ret;
 | 
			
		||||
			may_free &= ret;
 | 
			
		||||
		}
 | 
			
		||||
		curr_off = next_off;
 | 
			
		||||
		bh = next;
 | 
			
		||||
| 
						 | 
				
			
			@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 | 
			
		|||
		if (may_free && try_to_free_buffers(page))
 | 
			
		||||
			J_ASSERT(!page_has_buffers(page));
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1098,7 +1098,7 @@ void		 jbd2_journal_set_triggers(struct buffer_head *,
 | 
			
		|||
extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 | 
			
		||||
extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 | 
			
		||||
extern void	 journal_sync_buffer (struct buffer_head *);
 | 
			
		||||
extern void	 jbd2_journal_invalidatepage(journal_t *,
 | 
			
		||||
extern int	 jbd2_journal_invalidatepage(journal_t *,
 | 
			
		||||
				struct page *, unsigned long);
 | 
			
		||||
extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 | 
			
		||||
extern int	 jbd2_journal_stop(handle_t *);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue