mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	writeback: Refactor writeback_single_inode()
The code in writeback_single_inode() is relatively complex. The list requeing logic makes sense only for flusher thread but not really for sync_inode() or write_inode_now() callers. Also when we want to get rid of inode references held by flusher thread, we will need a special I_SYNC handling there. So separate part of writeback_single_inode() which does the real writeback work into __writeback_single_inode() and make writeback_single_inode() do only stuff necessary for callers writing only one inode, moving the special list handling into writeback_sb_inodes(). As a sideeffect this fixes a possible race where we could skip some inode during sync(2) because other writer refiled it from b_io to b_dirty list. Also I_SYNC handling is moved into the callers of __writeback_single_inode() to make locking easier. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
This commit is contained in:
		
							parent
							
								
									f0d07b7ffd
								
							
						
					
					
						commit
						4f8ad655db
					
				
					 1 changed files with 86 additions and 56 deletions
				
			
		|  | @ -364,6 +364,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | ||||||
| 	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) | 	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) | ||||||
| 		inode->dirtied_when = jiffies; | 		inode->dirtied_when = jiffies; | ||||||
| 
 | 
 | ||||||
|  | 	if (wbc->pages_skipped) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * writeback is not making progress due to locked | ||||||
|  | 		 * buffers. Skip this inode for now. | ||||||
|  | 		 */ | ||||||
|  | 		redirty_tail(inode, wb); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | 	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * We didn't write back all the pages.  nfs_writepages() | 		 * We didn't write back all the pages.  nfs_writepages() | ||||||
|  | @ -396,46 +405,20 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Write out an inode's dirty pages.  Called under wb->list_lock and |  * Write out an inode and its dirty pages. Do not update the writeback list | ||||||
|  * inode->i_lock.  Either the caller has an active reference on the inode or |  * linkage. That is left to the caller. The caller is also responsible for | ||||||
|  * the inode has I_WILL_FREE set. |  * setting I_SYNC flag and calling inode_sync_complete() to clear it. | ||||||
|  * |  | ||||||
|  * If `wait' is set, wait on the writeout. |  | ||||||
|  * |  | ||||||
|  * The whole writeout design is quite complex and fragile.  We want to avoid |  | ||||||
|  * starvation of particular inodes when others are being redirtied, prevent |  | ||||||
|  * livelocks, etc. |  | ||||||
|  */ |  */ | ||||||
| static int | static int | ||||||
| writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | __writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | ||||||
| 		       struct writeback_control *wbc) | 			 struct writeback_control *wbc) | ||||||
| { | { | ||||||
| 	struct address_space *mapping = inode->i_mapping; | 	struct address_space *mapping = inode->i_mapping; | ||||||
| 	long nr_to_write = wbc->nr_to_write; | 	long nr_to_write = wbc->nr_to_write; | ||||||
| 	unsigned dirty; | 	unsigned dirty; | ||||||
| 	int ret; | 	int ret; | ||||||
| 
 | 
 | ||||||
| 	assert_spin_locked(&inode->i_lock); | 	WARN_ON(!(inode->i_state & I_SYNC)); | ||||||
| 
 |  | ||||||
| 	if (!atomic_read(&inode->i_count)) |  | ||||||
| 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); |  | ||||||
| 	else |  | ||||||
| 		WARN_ON(inode->i_state & I_WILL_FREE); |  | ||||||
| 
 |  | ||||||
| 	if (inode->i_state & I_SYNC) { |  | ||||||
| 		if (wbc->sync_mode != WB_SYNC_ALL) |  | ||||||
| 			return 0; |  | ||||||
| 		/*
 |  | ||||||
| 		 * It's a data-integrity sync.  We must wait. |  | ||||||
| 		 */ |  | ||||||
| 		inode_wait_for_writeback(inode); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	BUG_ON(inode->i_state & I_SYNC); |  | ||||||
| 
 |  | ||||||
| 	/* Set I_SYNC, reset I_DIRTY_PAGES */ |  | ||||||
| 	inode->i_state |= I_SYNC; |  | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| 
 | 
 | ||||||
| 	ret = do_writepages(mapping, wbc); | 	ret = do_writepages(mapping, wbc); | ||||||
| 
 | 
 | ||||||
|  | @ -468,12 +451,65 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | ||||||
| 		if (ret == 0) | 		if (ret == 0) | ||||||
| 			ret = err; | 			ret = err; | ||||||
| 	} | 	} | ||||||
|  | 	trace_writeback_single_inode(inode, wbc, nr_to_write); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Write out an inode's dirty pages. Either the caller has an active reference | ||||||
|  |  * on the inode or the inode has I_WILL_FREE set. | ||||||
|  |  * | ||||||
|  |  * This function is designed to be called for writing back one inode which | ||||||
|  |  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() | ||||||
|  |  * and does more profound writeback list handling in writeback_sb_inodes(). | ||||||
|  |  */ | ||||||
|  | static int | ||||||
|  | writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | ||||||
|  | 		       struct writeback_control *wbc) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&inode->i_lock); | ||||||
|  | 	if (!atomic_read(&inode->i_count)) | ||||||
|  | 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | ||||||
|  | 	else | ||||||
|  | 		WARN_ON(inode->i_state & I_WILL_FREE); | ||||||
|  | 
 | ||||||
|  | 	if (inode->i_state & I_SYNC) { | ||||||
|  | 		if (wbc->sync_mode != WB_SYNC_ALL) | ||||||
|  | 			goto out; | ||||||
|  | 		/*
 | ||||||
|  | 		 * It's a data-integrity sync.  We must wait. | ||||||
|  | 		 */ | ||||||
|  | 		inode_wait_for_writeback(inode); | ||||||
|  | 	} | ||||||
|  | 	WARN_ON(inode->i_state & I_SYNC); | ||||||
|  | 	/*
 | ||||||
|  | 	 * Skip inode if it is clean. We don't want to mess with writeback | ||||||
|  | 	 * lists in this function since flusher thread may be doing for example | ||||||
|  | 	 * sync in parallel and if we move the inode, it could get skipped. So | ||||||
|  | 	 * here we make sure inode is on some writeback list and leave it there | ||||||
|  | 	 * unless we have completely cleaned the inode. | ||||||
|  | 	 */ | ||||||
|  | 	if (!(inode->i_state & I_DIRTY)) | ||||||
|  | 		goto out; | ||||||
|  | 	inode->i_state |= I_SYNC; | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
|  | 
 | ||||||
|  | 	ret = __writeback_single_inode(inode, wb, wbc); | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&wb->list_lock); | 	spin_lock(&wb->list_lock); | ||||||
| 	spin_lock(&inode->i_lock); | 	spin_lock(&inode->i_lock); | ||||||
| 	requeue_inode(inode, wb, wbc); | 	/*
 | ||||||
|  | 	 * If inode is clean, remove it from writeback lists. Otherwise don't | ||||||
|  | 	 * touch it. See comment above for explanation. | ||||||
|  | 	 */ | ||||||
|  | 	if (!(inode->i_state & I_DIRTY)) | ||||||
|  | 		list_del_init(&inode->i_wb_list); | ||||||
|  | 	spin_unlock(&wb->list_lock); | ||||||
| 	inode_sync_complete(inode); | 	inode_sync_complete(inode); | ||||||
| 	trace_writeback_single_inode(inode, wbc, nr_to_write); | out: | ||||||
|  | 	spin_unlock(&inode->i_lock); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -585,23 +621,29 @@ static long writeback_sb_inodes(struct super_block *sb, | ||||||
| 		spin_unlock(&wb->list_lock); | 		spin_unlock(&wb->list_lock); | ||||||
| 
 | 
 | ||||||
| 		__iget(inode); | 		__iget(inode); | ||||||
|  | 		/*
 | ||||||
|  | 		 * We already requeued the inode if it had I_SYNC set and we | ||||||
|  | 		 * are doing WB_SYNC_NONE writeback. So this catches only the | ||||||
|  | 		 * WB_SYNC_ALL case. | ||||||
|  | 		 */ | ||||||
|  | 		if (inode->i_state & I_SYNC) | ||||||
|  | 			inode_wait_for_writeback(inode); | ||||||
|  | 		inode->i_state |= I_SYNC; | ||||||
|  | 		spin_unlock(&inode->i_lock); | ||||||
| 		write_chunk = writeback_chunk_size(wb->bdi, work); | 		write_chunk = writeback_chunk_size(wb->bdi, work); | ||||||
| 		wbc.nr_to_write = write_chunk; | 		wbc.nr_to_write = write_chunk; | ||||||
| 		wbc.pages_skipped = 0; | 		wbc.pages_skipped = 0; | ||||||
| 
 | 
 | ||||||
| 		writeback_single_inode(inode, wb, &wbc); | 		__writeback_single_inode(inode, wb, &wbc); | ||||||
| 
 | 
 | ||||||
| 		work->nr_pages -= write_chunk - wbc.nr_to_write; | 		work->nr_pages -= write_chunk - wbc.nr_to_write; | ||||||
| 		wrote += write_chunk - wbc.nr_to_write; | 		wrote += write_chunk - wbc.nr_to_write; | ||||||
|  | 		spin_lock(&wb->list_lock); | ||||||
|  | 		spin_lock(&inode->i_lock); | ||||||
| 		if (!(inode->i_state & I_DIRTY)) | 		if (!(inode->i_state & I_DIRTY)) | ||||||
| 			wrote++; | 			wrote++; | ||||||
| 		if (wbc.pages_skipped) { | 		requeue_inode(inode, wb, &wbc); | ||||||
| 			/*
 | 		inode_sync_complete(inode); | ||||||
| 			 * writeback is not making progress due to locked |  | ||||||
| 			 * buffers.  Skip this inode for now. |  | ||||||
| 			 */ |  | ||||||
| 			redirty_tail(inode, wb); |  | ||||||
| 		} |  | ||||||
| 		spin_unlock(&inode->i_lock); | 		spin_unlock(&inode->i_lock); | ||||||
| 		spin_unlock(&wb->list_lock); | 		spin_unlock(&wb->list_lock); | ||||||
| 		iput(inode); | 		iput(inode); | ||||||
|  | @ -1337,7 +1379,6 @@ EXPORT_SYMBOL(sync_inodes_sb); | ||||||
| int write_inode_now(struct inode *inode, int sync) | int write_inode_now(struct inode *inode, int sync) | ||||||
| { | { | ||||||
| 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | ||||||
| 	int ret; |  | ||||||
| 	struct writeback_control wbc = { | 	struct writeback_control wbc = { | ||||||
| 		.nr_to_write = LONG_MAX, | 		.nr_to_write = LONG_MAX, | ||||||
| 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, | 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, | ||||||
|  | @ -1349,11 +1390,7 @@ int write_inode_now(struct inode *inode, int sync) | ||||||
| 		wbc.nr_to_write = 0; | 		wbc.nr_to_write = 0; | ||||||
| 
 | 
 | ||||||
| 	might_sleep(); | 	might_sleep(); | ||||||
| 	spin_lock(&inode->i_lock); | 	return writeback_single_inode(inode, wb, &wbc); | ||||||
| 	ret = writeback_single_inode(inode, wb, &wbc); |  | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| 	spin_unlock(&wb->list_lock); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(write_inode_now); | EXPORT_SYMBOL(write_inode_now); | ||||||
| 
 | 
 | ||||||
|  | @ -1370,14 +1407,7 @@ EXPORT_SYMBOL(write_inode_now); | ||||||
|  */ |  */ | ||||||
| int sync_inode(struct inode *inode, struct writeback_control *wbc) | int sync_inode(struct inode *inode, struct writeback_control *wbc) | ||||||
| { | { | ||||||
| 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); | ||||||
| 	int ret; |  | ||||||
| 
 |  | ||||||
| 	spin_lock(&inode->i_lock); |  | ||||||
| 	ret = writeback_single_inode(inode, wb, wbc); |  | ||||||
| 	spin_unlock(&inode->i_lock); |  | ||||||
| 	spin_unlock(&wb->list_lock); |  | ||||||
| 	return ret; |  | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(sync_inode); | EXPORT_SYMBOL(sync_inode); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Jan Kara
						Jan Kara