forked from mirrors/linux
		
	ext4: fix races between page faults and hole punching
Currently, page faults and hole punching are completely unsynchronized. This can result in page fault faulting in a page into a range that we are punching after truncate_pagecache_range() has been called and thus we can end up with a page mapped to disk blocks that will be shortly freed. Filesystem corruption will shortly follow. Note that the same race is avoided for truncate by checking page fault offset against i_size but there isn't similar mechanism available for punching holes. Fix the problem by creating new rw semaphore i_mmap_sem in inode and grab it for writing over truncate, hole punching, and other functions removing blocks from extent tree and for read over page faults. We cannot easily use i_data_sem for this since that ranks below transaction start and we need something ranking above it so that it can be held over the whole truncate / hole punching operation. Also remove various workarounds we had in the code to reduce race window when page fault could have created pages with stale mapping information. Signed-off-by: Jan Kara <jack@suse.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									f41683a204
								
							
						
					
					
						commit
						ea3d7209ca
					
				
					 6 changed files with 127 additions and 42 deletions
				
			
		| 
						 | 
					@ -910,6 +910,15 @@ struct ext4_inode_info {
 | 
				
			||||||
	 * by other means, so we have i_data_sem.
 | 
						 * by other means, so we have i_data_sem.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	struct rw_semaphore i_data_sem;
 | 
						struct rw_semaphore i_data_sem;
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * i_mmap_sem is for serializing page faults with truncate / punch hole
 | 
				
			||||||
 | 
						 * operations. We have to make sure that new page cannot be faulted in
 | 
				
			||||||
 | 
						 * a section of the inode that is being punched. We cannot easily use
 | 
				
			||||||
 | 
						 * i_data_sem for this since we need protection for the whole punch
 | 
				
			||||||
 | 
						 * operation and i_data_sem ranks below transaction start so we have
 | 
				
			||||||
 | 
						 * to occasionally drop it.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						struct rw_semaphore i_mmap_sem;
 | 
				
			||||||
	struct inode vfs_inode;
 | 
						struct inode vfs_inode;
 | 
				
			||||||
	struct jbd2_inode *jinode;
 | 
						struct jbd2_inode *jinode;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2484,6 +2493,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 | 
				
			||||||
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 | 
					extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 | 
				
			||||||
			     loff_t lstart, loff_t lend);
 | 
								     loff_t lstart, loff_t lend);
 | 
				
			||||||
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
					extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
				
			||||||
 | 
					extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 | 
				
			||||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 | 
					extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 | 
				
			||||||
extern void ext4_da_update_reserve_space(struct inode *inode,
 | 
					extern void ext4_da_update_reserve_space(struct inode *inode,
 | 
				
			||||||
					int used, int quota_claim);
 | 
										int used, int quota_claim);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 | 
				
			||||||
	int partial_begin, partial_end;
 | 
						int partial_begin, partial_end;
 | 
				
			||||||
	loff_t start, end;
 | 
						loff_t start, end;
 | 
				
			||||||
	ext4_lblk_t lblk;
 | 
						ext4_lblk_t lblk;
 | 
				
			||||||
	struct address_space *mapping = inode->i_mapping;
 | 
					 | 
				
			||||||
	unsigned int blkbits = inode->i_blkbits;
 | 
						unsigned int blkbits = inode->i_blkbits;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_ext4_zero_range(inode, offset, len, mode);
 | 
						trace_ext4_zero_range(inode, offset, len, mode);
 | 
				
			||||||
| 
						 | 
					@ -4785,17 +4784,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 | 
				
			||||||
			return ret;
 | 
								return ret;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Write out all dirty pages to avoid race conditions
 | 
					 | 
				
			||||||
	 * Then release them.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 | 
					 | 
				
			||||||
		ret = filemap_write_and_wait_range(mapping, offset,
 | 
					 | 
				
			||||||
						   offset + len - 1);
 | 
					 | 
				
			||||||
		if (ret)
 | 
					 | 
				
			||||||
			return ret;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Round up offset. This is not fallocate, we neet to zero out
 | 
						 * Round up offset. This is not fallocate, we neet to zero out
 | 
				
			||||||
	 * blocks, so convert interior block aligned part of the range to
 | 
						 * blocks, so convert interior block aligned part of the range to
 | 
				
			||||||
| 
						 | 
					@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 | 
				
			||||||
		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 | 
							flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 | 
				
			||||||
			  EXT4_EX_NOCACHE);
 | 
								  EXT4_EX_NOCACHE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Now release the pages and zero block aligned part of pages*/
 | 
					 | 
				
			||||||
		truncate_pagecache_range(inode, start, end - 1);
 | 
					 | 
				
			||||||
		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* Wait all existing dio workers, newcomers will block on i_mutex */
 | 
							/* Wait all existing dio workers, newcomers will block on i_mutex */
 | 
				
			||||||
		ext4_inode_block_unlocked_dio(inode);
 | 
							ext4_inode_block_unlocked_dio(inode);
 | 
				
			||||||
		inode_dio_wait(inode);
 | 
							inode_dio_wait(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Prevent page faults from reinstantiating pages we have
 | 
				
			||||||
 | 
							 * released from page cache.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
							/* Now release the pages and zero block aligned part of pages */
 | 
				
			||||||
 | 
							truncate_pagecache_range(inode, start, end - 1);
 | 
				
			||||||
 | 
							inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 | 
							ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 | 
				
			||||||
					     flags, mode);
 | 
										     flags, mode);
 | 
				
			||||||
 | 
							up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		if (ret)
 | 
							if (ret)
 | 
				
			||||||
			goto out_dio;
 | 
								goto out_dio;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 | 
				
			||||||
		goto out_mutex;
 | 
							goto out_mutex;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	truncate_pagecache(inode, ioffset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Wait for existing dio to complete */
 | 
						/* Wait for existing dio to complete */
 | 
				
			||||||
	ext4_inode_block_unlocked_dio(inode);
 | 
						ext4_inode_block_unlocked_dio(inode);
 | 
				
			||||||
	inode_dio_wait(inode);
 | 
						inode_dio_wait(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Prevent page faults from reinstantiating pages we have released from
 | 
				
			||||||
 | 
						 * page cache.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						truncate_pagecache(inode, ioffset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	credits = ext4_writepage_trans_blocks(inode);
 | 
						credits = ext4_writepage_trans_blocks(inode);
 | 
				
			||||||
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 | 
						handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 | 
				
			||||||
	if (IS_ERR(handle)) {
 | 
						if (IS_ERR(handle)) {
 | 
				
			||||||
		ret = PTR_ERR(handle);
 | 
							ret = PTR_ERR(handle);
 | 
				
			||||||
		goto out_dio;
 | 
							goto out_mmap;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	down_write(&EXT4_I(inode)->i_data_sem);
 | 
						down_write(&EXT4_I(inode)->i_data_sem);
 | 
				
			||||||
| 
						 | 
					@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_stop:
 | 
					out_stop:
 | 
				
			||||||
	ext4_journal_stop(handle);
 | 
						ext4_journal_stop(handle);
 | 
				
			||||||
out_dio:
 | 
					out_mmap:
 | 
				
			||||||
 | 
						up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	ext4_inode_resume_unlocked_dio(inode);
 | 
						ext4_inode_resume_unlocked_dio(inode);
 | 
				
			||||||
out_mutex:
 | 
					out_mutex:
 | 
				
			||||||
	mutex_unlock(&inode->i_mutex);
 | 
						mutex_unlock(&inode->i_mutex);
 | 
				
			||||||
| 
						 | 
					@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 | 
				
			||||||
		goto out_mutex;
 | 
							goto out_mutex;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	truncate_pagecache(inode, ioffset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Wait for existing dio to complete */
 | 
						/* Wait for existing dio to complete */
 | 
				
			||||||
	ext4_inode_block_unlocked_dio(inode);
 | 
						ext4_inode_block_unlocked_dio(inode);
 | 
				
			||||||
	inode_dio_wait(inode);
 | 
						inode_dio_wait(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Prevent page faults from reinstantiating pages we have released from
 | 
				
			||||||
 | 
						 * page cache.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						truncate_pagecache(inode, ioffset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	credits = ext4_writepage_trans_blocks(inode);
 | 
						credits = ext4_writepage_trans_blocks(inode);
 | 
				
			||||||
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 | 
						handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 | 
				
			||||||
	if (IS_ERR(handle)) {
 | 
						if (IS_ERR(handle)) {
 | 
				
			||||||
		ret = PTR_ERR(handle);
 | 
							ret = PTR_ERR(handle);
 | 
				
			||||||
		goto out_dio;
 | 
							goto out_mmap;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Expand file to avoid data loss if there is error while shifting */
 | 
						/* Expand file to avoid data loss if there is error while shifting */
 | 
				
			||||||
| 
						 | 
					@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_stop:
 | 
					out_stop:
 | 
				
			||||||
	ext4_journal_stop(handle);
 | 
						ext4_journal_stop(handle);
 | 
				
			||||||
out_dio:
 | 
					out_mmap:
 | 
				
			||||||
 | 
						up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	ext4_inode_resume_unlocked_dio(inode);
 | 
						ext4_inode_resume_unlocked_dio(inode);
 | 
				
			||||||
out_mutex:
 | 
					out_mutex:
 | 
				
			||||||
	mutex_unlock(&inode->i_mutex);
 | 
						mutex_unlock(&inode->i_mutex);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int result;
 | 
						int result;
 | 
				
			||||||
	handle_t *handle = NULL;
 | 
						handle_t *handle = NULL;
 | 
				
			||||||
	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
 | 
						struct inode *inode = file_inode(vma->vm_file);
 | 
				
			||||||
 | 
						struct super_block *sb = inode->i_sb;
 | 
				
			||||||
	bool write = vmf->flags & FAULT_FLAG_WRITE;
 | 
						bool write = vmf->flags & FAULT_FLAG_WRITE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (write) {
 | 
						if (write) {
 | 
				
			||||||
		sb_start_pagefault(sb);
 | 
							sb_start_pagefault(sb);
 | 
				
			||||||
		file_update_time(vma->vm_file);
 | 
							file_update_time(vma->vm_file);
 | 
				
			||||||
 | 
							down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 | 
							handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 | 
				
			||||||
						EXT4_DATA_TRANS_BLOCKS(sb));
 | 
											EXT4_DATA_TRANS_BLOCKS(sb));
 | 
				
			||||||
	}
 | 
						} else
 | 
				
			||||||
 | 
							down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (IS_ERR(handle))
 | 
						if (IS_ERR(handle))
 | 
				
			||||||
		result = VM_FAULT_SIGBUS;
 | 
							result = VM_FAULT_SIGBUS;
 | 
				
			||||||
| 
						 | 
					@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
	if (write) {
 | 
						if (write) {
 | 
				
			||||||
		if (!IS_ERR(handle))
 | 
							if (!IS_ERR(handle))
 | 
				
			||||||
			ext4_journal_stop(handle);
 | 
								ext4_journal_stop(handle);
 | 
				
			||||||
 | 
							up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		sb_end_pagefault(sb);
 | 
							sb_end_pagefault(sb);
 | 
				
			||||||
	}
 | 
						} else
 | 
				
			||||||
 | 
							up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
	if (write) {
 | 
						if (write) {
 | 
				
			||||||
		sb_start_pagefault(sb);
 | 
							sb_start_pagefault(sb);
 | 
				
			||||||
		file_update_time(vma->vm_file);
 | 
							file_update_time(vma->vm_file);
 | 
				
			||||||
 | 
							down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 | 
							handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 | 
				
			||||||
				ext4_chunk_trans_blocks(inode,
 | 
									ext4_chunk_trans_blocks(inode,
 | 
				
			||||||
							PMD_SIZE / PAGE_SIZE));
 | 
												PMD_SIZE / PAGE_SIZE));
 | 
				
			||||||
	}
 | 
						} else
 | 
				
			||||||
 | 
							down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (IS_ERR(handle))
 | 
						if (IS_ERR(handle))
 | 
				
			||||||
		result = VM_FAULT_SIGBUS;
 | 
							result = VM_FAULT_SIGBUS;
 | 
				
			||||||
| 
						 | 
					@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
	if (write) {
 | 
						if (write) {
 | 
				
			||||||
		if (!IS_ERR(handle))
 | 
							if (!IS_ERR(handle))
 | 
				
			||||||
			ext4_journal_stop(handle);
 | 
								ext4_journal_stop(handle);
 | 
				
			||||||
 | 
							up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		sb_end_pagefault(sb);
 | 
							sb_end_pagefault(sb);
 | 
				
			||||||
	}
 | 
						} else
 | 
				
			||||||
 | 
							up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
					static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
 | 
						int err;
 | 
				
			||||||
				ext4_end_io_unwritten);
 | 
						struct inode *inode = file_inode(vma->vm_file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb_start_pagefault(inode->i_sb);
 | 
				
			||||||
 | 
						file_update_time(vma->vm_file);
 | 
				
			||||||
 | 
						down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
 | 
				
			||||||
 | 
								    ext4_end_io_unwritten);
 | 
				
			||||||
 | 
						up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						sb_end_pagefault(inode->i_sb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
 | 
				
			||||||
 | 
					 * handler we check for races agaist truncate. Note that since we cycle through
 | 
				
			||||||
 | 
					 * i_mmap_sem, we are sure that also any hole punching that began before we
 | 
				
			||||||
 | 
					 * were called is finished by now and so if it included part of the file we
 | 
				
			||||||
 | 
					 * are working on, our pte will get unmapped and the check for pte_same() in
 | 
				
			||||||
 | 
					 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
 | 
				
			||||||
 | 
					 * desired.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
 | 
				
			||||||
 | 
									struct vm_fault *vmf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = file_inode(vma->vm_file);
 | 
				
			||||||
 | 
						struct super_block *sb = inode->i_sb;
 | 
				
			||||||
 | 
						int ret = VM_FAULT_NOPAGE;
 | 
				
			||||||
 | 
						loff_t size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb_start_pagefault(sb);
 | 
				
			||||||
 | 
						file_update_time(vma->vm_file);
 | 
				
			||||||
 | 
						down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 | 
				
			||||||
 | 
						if (vmf->pgoff >= size)
 | 
				
			||||||
 | 
							ret = VM_FAULT_SIGBUS;
 | 
				
			||||||
 | 
						up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						sb_end_pagefault(sb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const struct vm_operations_struct ext4_dax_vm_ops = {
 | 
					static const struct vm_operations_struct ext4_dax_vm_ops = {
 | 
				
			||||||
	.fault		= ext4_dax_fault,
 | 
						.fault		= ext4_dax_fault,
 | 
				
			||||||
	.pmd_fault	= ext4_dax_pmd_fault,
 | 
						.pmd_fault	= ext4_dax_pmd_fault,
 | 
				
			||||||
	.page_mkwrite	= ext4_dax_mkwrite,
 | 
						.page_mkwrite	= ext4_dax_mkwrite,
 | 
				
			||||||
	.pfn_mkwrite	= dax_pfn_mkwrite,
 | 
						.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define ext4_dax_vm_ops	ext4_file_vm_ops
 | 
					#define ext4_dax_vm_ops	ext4_file_vm_ops
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const struct vm_operations_struct ext4_file_vm_ops = {
 | 
					static const struct vm_operations_struct ext4_file_vm_ops = {
 | 
				
			||||||
	.fault		= filemap_fault,
 | 
						.fault		= ext4_filemap_fault,
 | 
				
			||||||
	.map_pages	= filemap_map_pages,
 | 
						.map_pages	= filemap_map_pages,
 | 
				
			||||||
	.page_mkwrite   = ext4_page_mkwrite,
 | 
						.page_mkwrite   = ext4_page_mkwrite,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3623,6 +3623,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Wait all existing dio workers, newcomers will block on i_mutex */
 | 
				
			||||||
 | 
						ext4_inode_block_unlocked_dio(inode);
 | 
				
			||||||
 | 
						inode_dio_wait(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Prevent page faults from reinstantiating pages we have released from
 | 
				
			||||||
 | 
						 * page cache.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	first_block_offset = round_up(offset, sb->s_blocksize);
 | 
						first_block_offset = round_up(offset, sb->s_blocksize);
 | 
				
			||||||
	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 | 
						last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3631,10 +3640,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 | 
				
			||||||
		truncate_pagecache_range(inode, first_block_offset,
 | 
							truncate_pagecache_range(inode, first_block_offset,
 | 
				
			||||||
					 last_block_offset);
 | 
										 last_block_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Wait all existing dio workers, newcomers will block on i_mutex */
 | 
					 | 
				
			||||||
	ext4_inode_block_unlocked_dio(inode);
 | 
					 | 
				
			||||||
	inode_dio_wait(inode);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 | 
						if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 | 
				
			||||||
		credits = ext4_writepage_trans_blocks(inode);
 | 
							credits = ext4_writepage_trans_blocks(inode);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
| 
						 | 
					@ -3680,16 +3685,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 | 
				
			||||||
	if (IS_SYNC(inode))
 | 
						if (IS_SYNC(inode))
 | 
				
			||||||
		ext4_handle_sync(handle);
 | 
							ext4_handle_sync(handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Now release the pages again to reduce race window */
 | 
					 | 
				
			||||||
	if (last_block_offset > first_block_offset)
 | 
					 | 
				
			||||||
		truncate_pagecache_range(inode, first_block_offset,
 | 
					 | 
				
			||||||
					 last_block_offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
						inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 | 
				
			||||||
	ext4_mark_inode_dirty(handle, inode);
 | 
						ext4_mark_inode_dirty(handle, inode);
 | 
				
			||||||
out_stop:
 | 
					out_stop:
 | 
				
			||||||
	ext4_journal_stop(handle);
 | 
						ext4_journal_stop(handle);
 | 
				
			||||||
out_dio:
 | 
					out_dio:
 | 
				
			||||||
 | 
						up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	ext4_inode_resume_unlocked_dio(inode);
 | 
						ext4_inode_resume_unlocked_dio(inode);
 | 
				
			||||||
out_mutex:
 | 
					out_mutex:
 | 
				
			||||||
	mutex_unlock(&inode->i_mutex);
 | 
						mutex_unlock(&inode->i_mutex);
 | 
				
			||||||
| 
						 | 
					@ -4823,6 +4824,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 | 
				
			||||||
			} else
 | 
								} else
 | 
				
			||||||
				ext4_wait_for_tail_page_commit(inode);
 | 
									ext4_wait_for_tail_page_commit(inode);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Truncate pagecache after we've waited for commit
 | 
							 * Truncate pagecache after we've waited for commit
 | 
				
			||||||
		 * in data=journal mode to make pages freeable.
 | 
							 * in data=journal mode to make pages freeable.
 | 
				
			||||||
| 
						 | 
					@ -4830,6 +4832,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 | 
				
			||||||
		truncate_pagecache(inode, inode->i_size);
 | 
							truncate_pagecache(inode, inode->i_size);
 | 
				
			||||||
		if (shrink)
 | 
							if (shrink)
 | 
				
			||||||
			ext4_truncate(inode);
 | 
								ext4_truncate(inode);
 | 
				
			||||||
 | 
							up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!rc) {
 | 
						if (!rc) {
 | 
				
			||||||
| 
						 | 
					@ -5278,6 +5281,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sb_start_pagefault(inode->i_sb);
 | 
						sb_start_pagefault(inode->i_sb);
 | 
				
			||||||
	file_update_time(vma->vm_file);
 | 
						file_update_time(vma->vm_file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	/* Delalloc case is easy... */
 | 
						/* Delalloc case is easy... */
 | 
				
			||||||
	if (test_opt(inode->i_sb, DELALLOC) &&
 | 
						if (test_opt(inode->i_sb, DELALLOC) &&
 | 
				
			||||||
	    !ext4_should_journal_data(inode) &&
 | 
						    !ext4_should_journal_data(inode) &&
 | 
				
			||||||
| 
						 | 
					@ -5347,6 +5352,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
out_ret:
 | 
					out_ret:
 | 
				
			||||||
	ret = block_page_mkwrite_return(ret);
 | 
						ret = block_page_mkwrite_return(ret);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
 | 
						up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	sb_end_pagefault(inode->i_sb);
 | 
						sb_end_pagefault(inode->i_sb);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = file_inode(vma->vm_file);
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
						err = filemap_fault(vma, vmf);
 | 
				
			||||||
 | 
						up_read(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -958,6 +958,7 @@ static void init_once(void *foo)
 | 
				
			||||||
	INIT_LIST_HEAD(&ei->i_orphan);
 | 
						INIT_LIST_HEAD(&ei->i_orphan);
 | 
				
			||||||
	init_rwsem(&ei->xattr_sem);
 | 
						init_rwsem(&ei->xattr_sem);
 | 
				
			||||||
	init_rwsem(&ei->i_data_sem);
 | 
						init_rwsem(&ei->i_data_sem);
 | 
				
			||||||
 | 
						init_rwsem(&ei->i_mmap_sem);
 | 
				
			||||||
	inode_init_once(&ei->vfs_inode);
 | 
						inode_init_once(&ei->vfs_inode);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,8 +10,10 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static inline void ext4_truncate_failed_write(struct inode *inode)
 | 
					static inline void ext4_truncate_failed_write(struct inode *inode)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						down_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
	truncate_inode_pages(inode->i_mapping, inode->i_size);
 | 
						truncate_inode_pages(inode->i_mapping, inode->i_size);
 | 
				
			||||||
	ext4_truncate(inode);
 | 
						ext4_truncate(inode);
 | 
				
			||||||
 | 
						up_write(&EXT4_I(inode)->i_mmap_sem);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue