mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	gfs2: iomap direct I/O support
The page unmapping previously done in gfs2_direct_IO is now done generically in iomap_dio_rw. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Reviewed-by: Bob Peterson <rpeterso@redhat.com>
This commit is contained in:
		
							parent
							
								
									bcfe94139a
								
							
						
					
					
						commit
						967bcc91b0
					
				
					 3 changed files with 136 additions and 110 deletions
				
			
		
							
								
								
									
										100
									
								
								fs/gfs2/aops.c
									
									
									
									
									
								
							
							
						
						
									
										100
									
								
								fs/gfs2/aops.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
 | 
			
		||||
				 struct buffer_head *bh_result, int create)
 | 
			
		||||
{
 | 
			
		||||
	return gfs2_block_map(inode, lblock, bh_result, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * gfs2_writepage_common - Common bits of writepage
 | 
			
		||||
 * @page: The page to be written
 | 
			
		||||
| 
						 | 
				
			
			@ -1024,96 +1018,6 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset,
 | 
			
		|||
		try_to_release_page(page, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * gfs2_ok_for_dio - check that dio is valid on this file
 | 
			
		||||
 * @ip: The inode
 | 
			
		||||
 * @offset: The offset at which we are reading or writing
 | 
			
		||||
 *
 | 
			
		||||
 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
 | 
			
		||||
 *          1 (to accept the i/o request)
 | 
			
		||||
 */
 | 
			
		||||
static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * Should we return an error here? I can't see that O_DIRECT for
 | 
			
		||||
	 * a stuffed file makes any sense. For now we'll silently fall
 | 
			
		||||
	 * back to buffered I/O
 | 
			
		||||
	 */
 | 
			
		||||
	if (gfs2_is_stuffed(ip))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (offset >= i_size_read(&ip->i_inode))
 | 
			
		||||
		return 0;
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 | 
			
		||||
{
 | 
			
		||||
	struct file *file = iocb->ki_filp;
 | 
			
		||||
	struct inode *inode = file->f_mapping->host;
 | 
			
		||||
	struct address_space *mapping = inode->i_mapping;
 | 
			
		||||
	struct gfs2_inode *ip = GFS2_I(inode);
 | 
			
		||||
	loff_t offset = iocb->ki_pos;
 | 
			
		||||
	struct gfs2_holder gh;
 | 
			
		||||
	int rv;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Deferred lock, even if its a write, since we do no allocation
 | 
			
		||||
	 * on this path. All we need change is atime, and this lock mode
 | 
			
		||||
	 * ensures that other nodes have flushed their buffered read caches
 | 
			
		||||
	 * (i.e. their page cache entries for this inode). We do not,
 | 
			
		||||
	 * unfortunately have the option of only flushing a range like
 | 
			
		||||
	 * the VFS does.
 | 
			
		||||
	 */
 | 
			
		||||
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
 | 
			
		||||
	rv = gfs2_glock_nq(&gh);
 | 
			
		||||
	if (rv)
 | 
			
		||||
		goto out_uninit;
 | 
			
		||||
	rv = gfs2_ok_for_dio(ip, offset);
 | 
			
		||||
	if (rv != 1)
 | 
			
		||||
		goto out; /* dio not valid, fall back to buffered i/o */
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Now since we are holding a deferred (CW) lock at this point, you
 | 
			
		||||
	 * might be wondering why this is ever needed. There is a case however
 | 
			
		||||
	 * where we've granted a deferred local lock against a cached exclusive
 | 
			
		||||
	 * glock. That is ok provided all granted local locks are deferred, but
 | 
			
		||||
	 * it also means that it is possible to encounter pages which are
 | 
			
		||||
	 * cached and possibly also mapped. So here we check for that and sort
 | 
			
		||||
	 * them out ahead of the dio. The glock state machine will take care of
 | 
			
		||||
	 * everything else.
 | 
			
		||||
	 *
 | 
			
		||||
	 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
 | 
			
		||||
	 * the first place, mapping->nr_pages will always be zero.
 | 
			
		||||
	 */
 | 
			
		||||
	if (mapping->nrpages) {
 | 
			
		||||
		loff_t lstart = offset & ~(PAGE_SIZE - 1);
 | 
			
		||||
		loff_t len = iov_iter_count(iter);
 | 
			
		||||
		loff_t end = PAGE_ALIGN(offset + len) - 1;
 | 
			
		||||
 | 
			
		||||
		rv = 0;
 | 
			
		||||
		if (len == 0)
 | 
			
		||||
			goto out;
 | 
			
		||||
		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
 | 
			
		||||
			unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
 | 
			
		||||
		rv = filemap_write_and_wait_range(mapping, lstart, end);
 | 
			
		||||
		if (rv)
 | 
			
		||||
			goto out;
 | 
			
		||||
		if (iov_iter_rw(iter) == WRITE)
 | 
			
		||||
			truncate_inode_pages_range(mapping, lstart, end);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
 | 
			
		||||
				  gfs2_get_block_direct, NULL, NULL, 0);
 | 
			
		||||
out:
 | 
			
		||||
	gfs2_glock_dq(&gh);
 | 
			
		||||
out_uninit:
 | 
			
		||||
	gfs2_holder_uninit(&gh);
 | 
			
		||||
	return rv;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * gfs2_releasepage - free the metadata associated with a page
 | 
			
		||||
 * @page: the page that's being released
 | 
			
		||||
| 
						 | 
				
			
			@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
 | 
			
		|||
	.bmap = gfs2_bmap,
 | 
			
		||||
	.invalidatepage = gfs2_invalidatepage,
 | 
			
		||||
	.releasepage = gfs2_releasepage,
 | 
			
		||||
	.direct_IO = gfs2_direct_IO,
 | 
			
		||||
	.direct_IO = noop_direct_IO,
 | 
			
		||||
	.migratepage = buffer_migrate_page,
 | 
			
		||||
	.is_partially_uptodate = block_is_partially_uptodate,
 | 
			
		||||
	.error_remove_page = generic_error_remove_page,
 | 
			
		||||
| 
						 | 
				
			
			@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
 | 
			
		|||
	.bmap = gfs2_bmap,
 | 
			
		||||
	.invalidatepage = gfs2_invalidatepage,
 | 
			
		||||
	.releasepage = gfs2_releasepage,
 | 
			
		||||
	.direct_IO = gfs2_direct_IO,
 | 
			
		||||
	.direct_IO = noop_direct_IO,
 | 
			
		||||
	.migratepage = buffer_migrate_page,
 | 
			
		||||
	.is_partially_uptodate = block_is_partially_uptodate,
 | 
			
		||||
	.error_remove_page = generic_error_remove_page,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -915,6 +915,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 | 
			
		|||
	} else if (flags & IOMAP_WRITE) {
 | 
			
		||||
		u64 alloc_size;
 | 
			
		||||
 | 
			
		||||
		if (flags & IOMAP_DIRECT)
 | 
			
		||||
			goto out;  /* (see gfs2_file_direct_write) */
 | 
			
		||||
 | 
			
		||||
		len = gfs2_alloc_size(inode, mp, len);
 | 
			
		||||
		alloc_size = len << inode->i_blkbits;
 | 
			
		||||
		if (alloc_size < iomap->length)
 | 
			
		||||
| 
						 | 
				
			
			@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 | 
			
		|||
	int ret;
 | 
			
		||||
 | 
			
		||||
	trace_gfs2_iomap_start(ip, pos, length, flags);
 | 
			
		||||
	if (flags & IOMAP_WRITE) {
 | 
			
		||||
	if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
 | 
			
		||||
		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
 | 
			
		||||
		release_metapath(&mp);
 | 
			
		||||
		/*
 | 
			
		||||
		 * Silently fall back to buffered I/O for stuffed files or if
 | 
			
		||||
		 * we've hot a hole (see gfs2_file_direct_write).
 | 
			
		||||
		 */
 | 
			
		||||
		if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
 | 
			
		||||
		    iomap->type != IOMAP_MAPPED)
 | 
			
		||||
			ret = -ENOTBLK;
 | 
			
		||||
	}
 | 
			
		||||
	trace_gfs2_iomap_end(ip, iomap, ret);
 | 
			
		||||
	return ret;
 | 
			
		||||
| 
						 | 
				
			
			@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 | 
			
		|||
	struct gfs2_trans *tr = current->journal_info;
 | 
			
		||||
	struct buffer_head *dibh = iomap->private;
 | 
			
		||||
 | 
			
		||||
	if (!(flags & IOMAP_WRITE))
 | 
			
		||||
	if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	if (iomap->type != IOMAP_INLINE) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										130
									
								
								fs/gfs2/file.c
									
									
									
									
									
								
							
							
						
						
									
										130
									
								
								fs/gfs2/file.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 | 
			
		|||
	return ret ? ret : ret1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
 | 
			
		||||
{
 | 
			
		||||
	struct file *file = iocb->ki_filp;
 | 
			
		||||
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 | 
			
		||||
	size_t count = iov_iter_count(to);
 | 
			
		||||
	struct gfs2_holder gh;
 | 
			
		||||
	ssize_t ret;
 | 
			
		||||
 | 
			
		||||
	if (!count)
 | 
			
		||||
		return 0; /* skip atime */
 | 
			
		||||
 | 
			
		||||
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
 | 
			
		||||
	ret = gfs2_glock_nq(&gh);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_uninit;
 | 
			
		||||
 | 
			
		||||
	/* fall back to buffered I/O for stuffed files */
 | 
			
		||||
	ret = -ENOTBLK;
 | 
			
		||||
	if (gfs2_is_stuffed(ip))
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	gfs2_glock_dq(&gh);
 | 
			
		||||
out_uninit:
 | 
			
		||||
	gfs2_holder_uninit(&gh);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 | 
			
		||||
{
 | 
			
		||||
	struct file *file = iocb->ki_filp;
 | 
			
		||||
	struct inode *inode = file->f_mapping->host;
 | 
			
		||||
	struct gfs2_inode *ip = GFS2_I(inode);
 | 
			
		||||
	size_t len = iov_iter_count(from);
 | 
			
		||||
	loff_t offset = iocb->ki_pos;
 | 
			
		||||
	struct gfs2_holder gh;
 | 
			
		||||
	ssize_t ret;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Deferred lock, even if its a write, since we do no allocation on
 | 
			
		||||
	 * this path. All we need to change is the atime, and this lock mode
 | 
			
		||||
	 * ensures that other nodes have flushed their buffered read caches
 | 
			
		||||
	 * (i.e. their page cache entries for this inode). We do not,
 | 
			
		||||
	 * unfortunately, have the option of only flushing a range like the
 | 
			
		||||
	 * VFS does.
 | 
			
		||||
	 */
 | 
			
		||||
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
 | 
			
		||||
	ret = gfs2_glock_nq(&gh);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_uninit;
 | 
			
		||||
 | 
			
		||||
	/* Silently fall back to buffered I/O when writing beyond EOF */
 | 
			
		||||
	if (offset + len > i_size_read(&ip->i_inode))
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	gfs2_glock_dq(&gh);
 | 
			
		||||
out_uninit:
 | 
			
		||||
	gfs2_holder_uninit(&gh);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 | 
			
		||||
{
 | 
			
		||||
	ssize_t ret;
 | 
			
		||||
 | 
			
		||||
	if (iocb->ki_flags & IOCB_DIRECT) {
 | 
			
		||||
		ret = gfs2_file_direct_read(iocb, to);
 | 
			
		||||
		if (likely(ret != -ENOTBLK))
 | 
			
		||||
			return ret;
 | 
			
		||||
		iocb->ki_flags &= ~IOCB_DIRECT;
 | 
			
		||||
	}
 | 
			
		||||
	return generic_file_read_iter(iocb, to);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * gfs2_file_write_iter - Perform a write to a file
 | 
			
		||||
 * @iocb: The io context
 | 
			
		||||
| 
						 | 
				
			
			@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 | 
			
		|||
	struct file *file = iocb->ki_filp;
 | 
			
		||||
	struct inode *inode = file_inode(file);
 | 
			
		||||
	struct gfs2_inode *ip = GFS2_I(inode);
 | 
			
		||||
	ssize_t ret;
 | 
			
		||||
	ssize_t written = 0, ret;
 | 
			
		||||
 | 
			
		||||
	ret = gfs2_rsqa_alloc(ip);
 | 
			
		||||
	if (ret)
 | 
			
		||||
| 
						 | 
				
			
			@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 | 
			
		|||
		gfs2_glock_dq_uninit(&gh);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (iocb->ki_flags & IOCB_DIRECT)
 | 
			
		||||
		return generic_file_write_iter(iocb, from);
 | 
			
		||||
 | 
			
		||||
	inode_lock(inode);
 | 
			
		||||
	ret = generic_write_checks(iocb, from);
 | 
			
		||||
	if (ret <= 0)
 | 
			
		||||
| 
						 | 
				
			
			@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 | 
			
		|||
	if (ret)
 | 
			
		||||
		goto out2;
 | 
			
		||||
 | 
			
		||||
	if (iocb->ki_flags & IOCB_DIRECT) {
 | 
			
		||||
		struct address_space *mapping = file->f_mapping;
 | 
			
		||||
		loff_t pos, endbyte;
 | 
			
		||||
		ssize_t buffered;
 | 
			
		||||
 | 
			
		||||
		written = gfs2_file_direct_write(iocb, from);
 | 
			
		||||
		if (written < 0 || !iov_iter_count(from))
 | 
			
		||||
			goto out2;
 | 
			
		||||
 | 
			
		||||
		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 | 
			
		||||
		if (unlikely(ret < 0))
 | 
			
		||||
			goto out2;
 | 
			
		||||
		buffered = ret;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We need to ensure that the page cache pages are written to
 | 
			
		||||
		 * disk and invalidated to preserve the expected O_DIRECT
 | 
			
		||||
		 * semantics.
 | 
			
		||||
		 */
 | 
			
		||||
		pos = iocb->ki_pos;
 | 
			
		||||
		endbyte = pos + buffered - 1;
 | 
			
		||||
		ret = filemap_write_and_wait_range(mapping, pos, endbyte);
 | 
			
		||||
		if (!ret) {
 | 
			
		||||
			iocb->ki_pos += buffered;
 | 
			
		||||
			written += buffered;
 | 
			
		||||
			invalidate_mapping_pages(mapping,
 | 
			
		||||
						 pos >> PAGE_SHIFT,
 | 
			
		||||
						 endbyte >> PAGE_SHIFT);
 | 
			
		||||
		} else {
 | 
			
		||||
			/*
 | 
			
		||||
			 * We don't know how much we wrote, so just return
 | 
			
		||||
			 * the number of bytes which were direct-written
 | 
			
		||||
			 */
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 | 
			
		||||
		if (likely(ret > 0))
 | 
			
		||||
			iocb->ki_pos += ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out2:
 | 
			
		||||
	current->backing_dev_info = NULL;
 | 
			
		||||
out:
 | 
			
		||||
	inode_unlock(inode);
 | 
			
		||||
	if (likely(ret > 0)) {
 | 
			
		||||
		iocb->ki_pos += ret;
 | 
			
		||||
 | 
			
		||||
		/* Handle various SYNC-type writes */
 | 
			
		||||
		ret = generic_write_sync(iocb, ret);
 | 
			
		||||
	}
 | 
			
		||||
	return ret;
 | 
			
		||||
	return written ? written : ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 | 
			
		||||
| 
						 | 
				
			
			@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 | 
			
		|||
 | 
			
		||||
const struct file_operations gfs2_file_fops = {
 | 
			
		||||
	.llseek		= gfs2_llseek,
 | 
			
		||||
	.read_iter	= generic_file_read_iter,
 | 
			
		||||
	.read_iter	= gfs2_file_read_iter,
 | 
			
		||||
	.write_iter	= gfs2_file_write_iter,
 | 
			
		||||
	.unlocked_ioctl	= gfs2_ioctl,
 | 
			
		||||
	.mmap		= gfs2_mmap,
 | 
			
		||||
| 
						 | 
				
			
			@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = {
 | 
			
		|||
 | 
			
		||||
const struct file_operations gfs2_file_fops_nolock = {
 | 
			
		||||
	.llseek		= gfs2_llseek,
 | 
			
		||||
	.read_iter	= generic_file_read_iter,
 | 
			
		||||
	.read_iter	= gfs2_file_read_iter,
 | 
			
		||||
	.write_iter	= gfs2_file_write_iter,
 | 
			
		||||
	.unlocked_ioctl	= gfs2_ioctl,
 | 
			
		||||
	.mmap		= gfs2_mmap,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue