forked from mirrors/linux
		
	ext4: punch_hole should wait for DIO writers
punch_hole is the place where we have to wait for all existing writers (writeback, aio, dio), but currently we simply flush pended end_io request which is not sufficient. Other issue is that punch_hole performed w/o i_mutex held which obviously result in dangerous data corruption due to write-after-free. This patch performs following changes: - Guard punch_hole with i_mutex - Recheck inode flags under i_mutex - Block all new dio readers in order to prevent information leak caused by read-after-free pattern. - punch_hole now wait for all writers in flight NOTE: XXX write-after-free race is still possible because new dirty pages may appear due to mmap(), and currently there is no easy way to stop writeback while punch_hole is in progress. [ Fixed error return from ext4_ext_punch_hole() to make sure that we release i_mutex before returning EPERM or ETXTBUSY -- Ted ] Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									1f555cfa29
								
							
						
					
					
						commit
						02d262dffc
					
				
					 1 changed files with 36 additions and 17 deletions
				
			
		| 
						 | 
					@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 | 
				
			||||||
	loff_t first_page_offset, last_page_offset;
 | 
						loff_t first_page_offset, last_page_offset;
 | 
				
			||||||
	int credits, err = 0;
 | 
						int credits, err = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Write out all dirty pages to avoid race conditions
 | 
				
			||||||
 | 
						 * Then release them.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 | 
				
			||||||
 | 
							err = filemap_write_and_wait_range(mapping,
 | 
				
			||||||
 | 
								offset, offset + length - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (err)
 | 
				
			||||||
 | 
								return err;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&inode->i_mutex);
 | 
				
			||||||
 | 
						/* It's not possible punch hole on append only file */
 | 
				
			||||||
 | 
						if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
 | 
				
			||||||
 | 
							err = -EPERM;
 | 
				
			||||||
 | 
							goto out_mutex;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (IS_SWAPFILE(inode)) {
 | 
				
			||||||
 | 
							err = -ETXTBSY;
 | 
				
			||||||
 | 
							goto out_mutex;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* No need to punch hole beyond i_size */
 | 
						/* No need to punch hole beyond i_size */
 | 
				
			||||||
	if (offset >= inode->i_size)
 | 
						if (offset >= inode->i_size)
 | 
				
			||||||
		return 0;
 | 
							goto out_mutex;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If the hole extends beyond i_size, set the hole
 | 
						 * If the hole extends beyond i_size, set the hole
 | 
				
			||||||
| 
						 | 
					@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 | 
				
			||||||
	first_page_offset = first_page << PAGE_CACHE_SHIFT;
 | 
						first_page_offset = first_page << PAGE_CACHE_SHIFT;
 | 
				
			||||||
	last_page_offset = last_page << PAGE_CACHE_SHIFT;
 | 
						last_page_offset = last_page << PAGE_CACHE_SHIFT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Write out all dirty pages to avoid race conditions
 | 
					 | 
				
			||||||
	 * Then release them.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 | 
					 | 
				
			||||||
		err = filemap_write_and_wait_range(mapping,
 | 
					 | 
				
			||||||
			offset, offset + length - 1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (err)
 | 
					 | 
				
			||||||
			return err;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Now release the pages */
 | 
						/* Now release the pages */
 | 
				
			||||||
	if (last_page_offset > first_page_offset) {
 | 
						if (last_page_offset > first_page_offset) {
 | 
				
			||||||
		truncate_pagecache_range(inode, first_page_offset,
 | 
							truncate_pagecache_range(inode, first_page_offset,
 | 
				
			||||||
					 last_page_offset - 1);
 | 
										 last_page_offset - 1);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* finish any pending end_io work */
 | 
						/* Wait all existing dio workers, newcomers will block on i_mutex */
 | 
				
			||||||
 | 
						ext4_inode_block_unlocked_dio(inode);
 | 
				
			||||||
 | 
						inode_dio_wait(inode);
 | 
				
			||||||
	err = ext4_flush_completed_IO(inode);
 | 
						err = ext4_flush_completed_IO(inode);
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		return err;
 | 
							goto out_dio;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	credits = ext4_writepage_trans_blocks(inode);
 | 
						credits = ext4_writepage_trans_blocks(inode);
 | 
				
			||||||
	handle = ext4_journal_start(inode, credits);
 | 
						handle = ext4_journal_start(inode, credits);
 | 
				
			||||||
	if (IS_ERR(handle))
 | 
						if (IS_ERR(handle)) {
 | 
				
			||||||
		return PTR_ERR(handle);
 | 
							err = PTR_ERR(handle);
 | 
				
			||||||
 | 
							goto out_dio;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -4930,6 +4945,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 | 
				
			||||||
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
						inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
				
			||||||
	ext4_mark_inode_dirty(handle, inode);
 | 
						ext4_mark_inode_dirty(handle, inode);
 | 
				
			||||||
	ext4_journal_stop(handle);
 | 
						ext4_journal_stop(handle);
 | 
				
			||||||
 | 
					out_dio:
 | 
				
			||||||
 | 
						ext4_inode_resume_unlocked_dio(inode);
 | 
				
			||||||
 | 
					out_mutex:
 | 
				
			||||||
 | 
						mutex_unlock(&inode->i_mutex);
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 | 
					int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue