mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	xfs: implement iomap based buffered write path
Convert XFS to use the new iomap based multipage write path. This involves implementing the ->iomap_begin and ->iomap_end methods, and switching the buffered file write, page_mkwrite and xfs_iozero paths to the new iomap helpers. With this change __xfs_get_blocks will never be used for buffered writes, and the code handling them can be removed. Based on earlier code from Dave Chinner. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
		
							parent
							
								
									f0c6bcba74
								
							
						
					
					
						commit
						68a9f5e700
					
				
					 7 changed files with 187 additions and 258 deletions
				
			
		| 
						 | 
					@ -4,6 +4,7 @@ config XFS_FS
 | 
				
			||||||
	depends on (64BIT || LBDAF)
 | 
						depends on (64BIT || LBDAF)
 | 
				
			||||||
	select EXPORTFS
 | 
						select EXPORTFS
 | 
				
			||||||
	select LIBCRC32C
 | 
						select LIBCRC32C
 | 
				
			||||||
 | 
						select FS_IOMAP
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
	  XFS is a high performance journaling filesystem which originated
 | 
						  XFS is a high performance journaling filesystem which originated
 | 
				
			||||||
	  on the SGI IRIX platform.  It is completely multi-threaded, can
 | 
						  on the SGI IRIX platform.  It is completely multi-threaded, can
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1427,216 +1427,6 @@ xfs_vm_direct_IO(
 | 
				
			||||||
			xfs_get_blocks_direct, endio, NULL, flags);
 | 
								xfs_get_blocks_direct, endio, NULL, flags);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Punch out the delalloc blocks we have already allocated.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Don't bother with xfs_setattr given that nothing can have made it to disk yet
 | 
					 | 
				
			||||||
 * as the page is still locked at this point.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
STATIC void
 | 
					 | 
				
			||||||
xfs_vm_kill_delalloc_range(
 | 
					 | 
				
			||||||
	struct inode		*inode,
 | 
					 | 
				
			||||||
	loff_t			start,
 | 
					 | 
				
			||||||
	loff_t			end)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct xfs_inode	*ip = XFS_I(inode);
 | 
					 | 
				
			||||||
	xfs_fileoff_t		start_fsb;
 | 
					 | 
				
			||||||
	xfs_fileoff_t		end_fsb;
 | 
					 | 
				
			||||||
	int			error;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
 | 
					 | 
				
			||||||
	end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
 | 
					 | 
				
			||||||
	if (end_fsb <= start_fsb)
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	xfs_ilock(ip, XFS_ILOCK_EXCL);
 | 
					 | 
				
			||||||
	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 | 
					 | 
				
			||||||
						end_fsb - start_fsb);
 | 
					 | 
				
			||||||
	if (error) {
 | 
					 | 
				
			||||||
		/* something screwed, just bail */
 | 
					 | 
				
			||||||
		if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 | 
					 | 
				
			||||||
			xfs_alert(ip->i_mount,
 | 
					 | 
				
			||||||
		"xfs_vm_write_failed: unable to clean up ino %lld",
 | 
					 | 
				
			||||||
					ip->i_ino);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
STATIC void
 | 
					 | 
				
			||||||
xfs_vm_write_failed(
 | 
					 | 
				
			||||||
	struct inode		*inode,
 | 
					 | 
				
			||||||
	struct page		*page,
 | 
					 | 
				
			||||||
	loff_t			pos,
 | 
					 | 
				
			||||||
	unsigned		len)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	loff_t			block_offset;
 | 
					 | 
				
			||||||
	loff_t			block_start;
 | 
					 | 
				
			||||||
	loff_t			block_end;
 | 
					 | 
				
			||||||
	loff_t			from = pos & (PAGE_SIZE - 1);
 | 
					 | 
				
			||||||
	loff_t			to = from + len;
 | 
					 | 
				
			||||||
	struct buffer_head	*bh, *head;
 | 
					 | 
				
			||||||
	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * The request pos offset might be 32 or 64 bit, this is all fine
 | 
					 | 
				
			||||||
	 * on 64-bit platform.  However, for 64-bit pos request on 32-bit
 | 
					 | 
				
			||||||
	 * platform, the high 32-bit will be masked off if we evaluate the
 | 
					 | 
				
			||||||
	 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
 | 
					 | 
				
			||||||
	 * 0xfffff000 as an unsigned long, hence the result is incorrect
 | 
					 | 
				
			||||||
	 * which could cause the following ASSERT failed in most cases.
 | 
					 | 
				
			||||||
	 * In order to avoid this, we can evaluate the block_offset of the
 | 
					 | 
				
			||||||
	 * start of the page by using shifts rather than masks the mismatch
 | 
					 | 
				
			||||||
	 * problem.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ASSERT(block_offset + from == pos);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	head = page_buffers(page);
 | 
					 | 
				
			||||||
	block_start = 0;
 | 
					 | 
				
			||||||
	for (bh = head; bh != head || !block_start;
 | 
					 | 
				
			||||||
	     bh = bh->b_this_page, block_start = block_end,
 | 
					 | 
				
			||||||
				   block_offset += bh->b_size) {
 | 
					 | 
				
			||||||
		block_end = block_start + bh->b_size;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* skip buffers before the write */
 | 
					 | 
				
			||||||
		if (block_end <= from)
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* if the buffer is after the write, we're done */
 | 
					 | 
				
			||||||
		if (block_start >= to)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Process delalloc and unwritten buffers beyond EOF. We can
 | 
					 | 
				
			||||||
		 * encounter unwritten buffers in the event that a file has
 | 
					 | 
				
			||||||
		 * post-EOF unwritten extents and an extending write happens to
 | 
					 | 
				
			||||||
		 * fail (e.g., an unaligned write that also involves a delalloc
 | 
					 | 
				
			||||||
		 * to the same page).
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (!buffer_delay(bh) && !buffer_unwritten(bh))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
 | 
					 | 
				
			||||||
		    block_offset < i_size_read(inode))
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (buffer_delay(bh))
 | 
					 | 
				
			||||||
			xfs_vm_kill_delalloc_range(inode, block_offset,
 | 
					 | 
				
			||||||
						   block_offset + bh->b_size);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * This buffer does not contain data anymore. make sure anyone
 | 
					 | 
				
			||||||
		 * who finds it knows that for certain.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		clear_buffer_delay(bh);
 | 
					 | 
				
			||||||
		clear_buffer_uptodate(bh);
 | 
					 | 
				
			||||||
		clear_buffer_mapped(bh);
 | 
					 | 
				
			||||||
		clear_buffer_new(bh);
 | 
					 | 
				
			||||||
		clear_buffer_dirty(bh);
 | 
					 | 
				
			||||||
		clear_buffer_unwritten(bh);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * This used to call block_write_begin(), but it unlocks and releases the page
 | 
					 | 
				
			||||||
 * on error, and we need that page to be able to punch stale delalloc blocks out
 | 
					 | 
				
			||||||
 * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
 | 
					 | 
				
			||||||
 * the appropriate point.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
STATIC int
 | 
					 | 
				
			||||||
xfs_vm_write_begin(
 | 
					 | 
				
			||||||
	struct file		*file,
 | 
					 | 
				
			||||||
	struct address_space	*mapping,
 | 
					 | 
				
			||||||
	loff_t			pos,
 | 
					 | 
				
			||||||
	unsigned		len,
 | 
					 | 
				
			||||||
	unsigned		flags,
 | 
					 | 
				
			||||||
	struct page		**pagep,
 | 
					 | 
				
			||||||
	void			**fsdata)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pgoff_t			index = pos >> PAGE_SHIFT;
 | 
					 | 
				
			||||||
	struct page		*page;
 | 
					 | 
				
			||||||
	int			status;
 | 
					 | 
				
			||||||
	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ASSERT(len <= PAGE_SIZE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	page = grab_cache_page_write_begin(mapping, index, flags);
 | 
					 | 
				
			||||||
	if (!page)
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	status = __block_write_begin(page, pos, len, xfs_get_blocks);
 | 
					 | 
				
			||||||
	if (xfs_mp_fail_writes(mp))
 | 
					 | 
				
			||||||
		status = -EIO;
 | 
					 | 
				
			||||||
	if (unlikely(status)) {
 | 
					 | 
				
			||||||
		struct inode	*inode = mapping->host;
 | 
					 | 
				
			||||||
		size_t		isize = i_size_read(inode);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		xfs_vm_write_failed(inode, page, pos, len);
 | 
					 | 
				
			||||||
		unlock_page(page);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * If the write is beyond EOF, we only want to kill blocks
 | 
					 | 
				
			||||||
		 * allocated in this write, not blocks that were previously
 | 
					 | 
				
			||||||
		 * written successfully.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (xfs_mp_fail_writes(mp))
 | 
					 | 
				
			||||||
			isize = 0;
 | 
					 | 
				
			||||||
		if (pos + len > isize) {
 | 
					 | 
				
			||||||
			ssize_t start = max_t(ssize_t, pos, isize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			truncate_pagecache_range(inode, start, pos + len);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		put_page(page);
 | 
					 | 
				
			||||||
		page = NULL;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*pagep = page;
 | 
					 | 
				
			||||||
	return status;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
 | 
					 | 
				
			||||||
 * this specific write because they will never be written. Previous writes
 | 
					 | 
				
			||||||
 * beyond EOF where block allocation succeeded do not need to be trashed, so
 | 
					 | 
				
			||||||
 * only new blocks from this write should be trashed. For blocks within
 | 
					 | 
				
			||||||
 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
 | 
					 | 
				
			||||||
 * written with all the other valid data.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
STATIC int
 | 
					 | 
				
			||||||
xfs_vm_write_end(
 | 
					 | 
				
			||||||
	struct file		*file,
 | 
					 | 
				
			||||||
	struct address_space	*mapping,
 | 
					 | 
				
			||||||
	loff_t			pos,
 | 
					 | 
				
			||||||
	unsigned		len,
 | 
					 | 
				
			||||||
	unsigned		copied,
 | 
					 | 
				
			||||||
	struct page		*page,
 | 
					 | 
				
			||||||
	void			*fsdata)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int			ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ASSERT(len <= PAGE_SIZE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 | 
					 | 
				
			||||||
	if (unlikely(ret < len)) {
 | 
					 | 
				
			||||||
		struct inode	*inode = mapping->host;
 | 
					 | 
				
			||||||
		size_t		isize = i_size_read(inode);
 | 
					 | 
				
			||||||
		loff_t		to = pos + len;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (to > isize) {
 | 
					 | 
				
			||||||
			/* only kill blocks in this write beyond EOF */
 | 
					 | 
				
			||||||
			if (pos > isize)
 | 
					 | 
				
			||||||
				isize = pos;
 | 
					 | 
				
			||||||
			xfs_vm_kill_delalloc_range(inode, isize, to);
 | 
					 | 
				
			||||||
			truncate_pagecache_range(inode, isize, to);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
STATIC sector_t
 | 
					STATIC sector_t
 | 
				
			||||||
xfs_vm_bmap(
 | 
					xfs_vm_bmap(
 | 
				
			||||||
	struct address_space	*mapping,
 | 
						struct address_space	*mapping,
 | 
				
			||||||
| 
						 | 
					@ -1747,8 +1537,6 @@ const struct address_space_operations xfs_address_space_operations = {
 | 
				
			||||||
	.set_page_dirty		= xfs_vm_set_page_dirty,
 | 
						.set_page_dirty		= xfs_vm_set_page_dirty,
 | 
				
			||||||
	.releasepage		= xfs_vm_releasepage,
 | 
						.releasepage		= xfs_vm_releasepage,
 | 
				
			||||||
	.invalidatepage		= xfs_vm_invalidatepage,
 | 
						.invalidatepage		= xfs_vm_invalidatepage,
 | 
				
			||||||
	.write_begin		= xfs_vm_write_begin,
 | 
					 | 
				
			||||||
	.write_end		= xfs_vm_write_end,
 | 
					 | 
				
			||||||
	.bmap			= xfs_vm_bmap,
 | 
						.bmap			= xfs_vm_bmap,
 | 
				
			||||||
	.direct_IO		= xfs_vm_direct_IO,
 | 
						.direct_IO		= xfs_vm_direct_IO,
 | 
				
			||||||
	.migratepage		= buffer_migrate_page,
 | 
						.migratepage		= buffer_migrate_page,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -37,6 +37,7 @@
 | 
				
			||||||
#include "xfs_log.h"
 | 
					#include "xfs_log.h"
 | 
				
			||||||
#include "xfs_icache.h"
 | 
					#include "xfs_icache.h"
 | 
				
			||||||
#include "xfs_pnfs.h"
 | 
					#include "xfs_pnfs.h"
 | 
				
			||||||
 | 
					#include "xfs_iomap.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/dcache.h>
 | 
					#include <linux/dcache.h>
 | 
				
			||||||
#include <linux/falloc.h>
 | 
					#include <linux/falloc.h>
 | 
				
			||||||
| 
						 | 
					@ -79,57 +80,27 @@ xfs_rw_ilock_demote(
 | 
				
			||||||
		inode_unlock(VFS_I(ip));
 | 
							inode_unlock(VFS_I(ip));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					static int
 | 
				
			||||||
 * xfs_iozero clears the specified range supplied via the page cache (except in
 | 
					xfs_dax_zero_range(
 | 
				
			||||||
 * the DAX case). Writes through the page cache will allocate blocks over holes,
 | 
						struct inode		*inode,
 | 
				
			||||||
 * though the callers usually map the holes first and avoid them. If a block is
 | 
						loff_t			pos,
 | 
				
			||||||
 * not completely zeroed, then it will be read from disk before being partially
 | 
						size_t			count)
 | 
				
			||||||
 * zeroed.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * In the DAX case, we can just directly write to the underlying pages. This
 | 
					 | 
				
			||||||
 * will not allocate blocks, but will avoid holes and unwritten extents and so
 | 
					 | 
				
			||||||
 * not do unnecessary work.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
int
 | 
					 | 
				
			||||||
xfs_iozero(
 | 
					 | 
				
			||||||
	struct xfs_inode	*ip,	/* inode			*/
 | 
					 | 
				
			||||||
	loff_t			pos,	/* offset in file		*/
 | 
					 | 
				
			||||||
	size_t			count)	/* size of data to zero		*/
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct page		*page;
 | 
					 | 
				
			||||||
	struct address_space	*mapping;
 | 
					 | 
				
			||||||
	int			status = 0;
 | 
						int			status = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
	mapping = VFS_I(ip)->i_mapping;
 | 
					 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		unsigned offset, bytes;
 | 
							unsigned offset, bytes;
 | 
				
			||||||
		void *fsdata;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		offset = (pos & (PAGE_SIZE -1)); /* Within page */
 | 
							offset = (pos & (PAGE_SIZE -1)); /* Within page */
 | 
				
			||||||
		bytes = PAGE_SIZE - offset;
 | 
							bytes = PAGE_SIZE - offset;
 | 
				
			||||||
		if (bytes > count)
 | 
							if (bytes > count)
 | 
				
			||||||
			bytes = count;
 | 
								bytes = count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (IS_DAX(VFS_I(ip))) {
 | 
							status = dax_zero_page_range(inode, pos, bytes,
 | 
				
			||||||
			status = dax_zero_page_range(VFS_I(ip), pos, bytes,
 | 
					 | 
				
			||||||
					     xfs_get_blocks_direct);
 | 
										     xfs_get_blocks_direct);
 | 
				
			||||||
		if (status)
 | 
							if (status)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			status = pagecache_write_begin(NULL, mapping, pos, bytes,
 | 
					 | 
				
			||||||
						AOP_FLAG_UNINTERRUPTIBLE,
 | 
					 | 
				
			||||||
						&page, &fsdata);
 | 
					 | 
				
			||||||
			if (status)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
			zero_user(page, offset, bytes);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			status = pagecache_write_end(NULL, mapping, pos, bytes,
 | 
					 | 
				
			||||||
						bytes, page, fsdata);
 | 
					 | 
				
			||||||
			WARN_ON(status <= 0); /* can't return less than zero! */
 | 
					 | 
				
			||||||
			status = 0;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		pos += bytes;
 | 
							pos += bytes;
 | 
				
			||||||
		count -= bytes;
 | 
							count -= bytes;
 | 
				
			||||||
	} while (count);
 | 
						} while (count);
 | 
				
			||||||
| 
						 | 
					@ -137,6 +108,24 @@ xfs_iozero(
 | 
				
			||||||
	return status;
 | 
						return status;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Clear the specified ranges to zero through either the pagecache or DAX.
 | 
				
			||||||
 | 
					 * Holes and unwritten extents will be left as-is as they already are zeroed.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int
 | 
				
			||||||
 | 
					xfs_iozero(
 | 
				
			||||||
 | 
						struct xfs_inode	*ip,
 | 
				
			||||||
 | 
						loff_t			pos,
 | 
				
			||||||
 | 
						size_t			count)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode		*inode = VFS_I(ip);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (IS_DAX(VFS_I(ip)))
 | 
				
			||||||
 | 
							return xfs_dax_zero_range(inode, pos, count);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
xfs_update_prealloc_flags(
 | 
					xfs_update_prealloc_flags(
 | 
				
			||||||
	struct xfs_inode	*ip,
 | 
						struct xfs_inode	*ip,
 | 
				
			||||||
| 
						 | 
					@ -841,7 +830,7 @@ xfs_file_buffered_aio_write(
 | 
				
			||||||
write_retry:
 | 
					write_retry:
 | 
				
			||||||
	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
 | 
						trace_xfs_file_buffered_write(ip, iov_iter_count(from),
 | 
				
			||||||
				      iocb->ki_pos, 0);
 | 
									      iocb->ki_pos, 0);
 | 
				
			||||||
	ret = generic_perform_write(file, from, iocb->ki_pos);
 | 
						ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
 | 
				
			||||||
	if (likely(ret >= 0))
 | 
						if (likely(ret >= 0))
 | 
				
			||||||
		iocb->ki_pos += ret;
 | 
							iocb->ki_pos += ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1553,7 +1542,7 @@ xfs_filemap_page_mkwrite(
 | 
				
			||||||
	if (IS_DAX(inode)) {
 | 
						if (IS_DAX(inode)) {
 | 
				
			||||||
		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
 | 
							ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
 | 
							ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 | 
				
			||||||
		ret = block_page_mkwrite_return(ret);
 | 
							ret = block_page_mkwrite_return(ret);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -967,3 +967,147 @@ xfs_bmbt_to_iomap(
 | 
				
			||||||
	iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
 | 
						iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
 | 
				
			||||||
	iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
 | 
						iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return !nimaps ||
 | 
				
			||||||
 | 
							imap->br_startblock == HOLESTARTBLOCK ||
 | 
				
			||||||
 | 
							imap->br_startblock == DELAYSTARTBLOCK;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int
 | 
				
			||||||
 | 
					xfs_file_iomap_begin(
 | 
				
			||||||
 | 
						struct inode		*inode,
 | 
				
			||||||
 | 
						loff_t			offset,
 | 
				
			||||||
 | 
						loff_t			length,
 | 
				
			||||||
 | 
						unsigned		flags,
 | 
				
			||||||
 | 
						struct iomap		*iomap)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct xfs_inode	*ip = XFS_I(inode);
 | 
				
			||||||
 | 
						struct xfs_mount	*mp = ip->i_mount;
 | 
				
			||||||
 | 
						struct xfs_bmbt_irec	imap;
 | 
				
			||||||
 | 
						xfs_fileoff_t		offset_fsb, end_fsb;
 | 
				
			||||||
 | 
						int			nimaps = 1, error = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (XFS_FORCED_SHUTDOWN(mp))
 | 
				
			||||||
 | 
							return -EIO;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						xfs_ilock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ASSERT(offset <= mp->m_super->s_maxbytes);
 | 
				
			||||||
 | 
						if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
 | 
				
			||||||
 | 
							length = mp->m_super->s_maxbytes - offset;
 | 
				
			||||||
 | 
						offset_fsb = XFS_B_TO_FSBT(mp, offset);
 | 
				
			||||||
 | 
						end_fsb = XFS_B_TO_FSB(mp, offset + length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 | 
				
			||||||
 | 
								       &nimaps, XFS_BMAPI_ENTIRE);
 | 
				
			||||||
 | 
						if (error) {
 | 
				
			||||||
 | 
							xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
							return error;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 | 
				
			||||||
 | 
							 * pages to keep the chunks of work done where somewhat symmetric
 | 
				
			||||||
 | 
							 * with the work writeback does. This is a completely arbitrary
 | 
				
			||||||
 | 
							 * number pulled out of thin air as a best guess for initial
 | 
				
			||||||
 | 
							 * testing.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Note that the values needs to be less than 32-bits wide until
 | 
				
			||||||
 | 
							 * the lower level functions are updated.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							length = min_t(loff_t, length, 1024 * PAGE_SIZE);
 | 
				
			||||||
 | 
							if (xfs_get_extsz_hint(ip)) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * xfs_iomap_write_direct() expects the shared lock. It
 | 
				
			||||||
 | 
								 * is unlocked on return.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
								error = xfs_iomap_write_direct(ip, offset, length, &imap,
 | 
				
			||||||
 | 
										nimaps);
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								error = xfs_iomap_write_delay(ip, offset, length, &imap);
 | 
				
			||||||
 | 
								xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (error)
 | 
				
			||||||
 | 
								return error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 | 
				
			||||||
 | 
							xfs_bmbt_to_iomap(ip, iomap, &imap);
 | 
				
			||||||
 | 
						} else if (nimaps) {
 | 
				
			||||||
 | 
							xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
							trace_xfs_iomap_found(ip, offset, length, 0, &imap);
 | 
				
			||||||
 | 
							xfs_bmbt_to_iomap(ip, iomap, &imap);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
							trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
 | 
				
			||||||
 | 
							iomap->blkno = IOMAP_NULL_BLOCK;
 | 
				
			||||||
 | 
							iomap->type = IOMAP_HOLE;
 | 
				
			||||||
 | 
							iomap->offset = offset;
 | 
				
			||||||
 | 
							iomap->length = length;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int
 | 
				
			||||||
 | 
					xfs_file_iomap_end_delalloc(
 | 
				
			||||||
 | 
						struct xfs_inode	*ip,
 | 
				
			||||||
 | 
						loff_t			offset,
 | 
				
			||||||
 | 
						loff_t			length,
 | 
				
			||||||
 | 
						ssize_t			written)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct xfs_mount	*mp = ip->i_mount;
 | 
				
			||||||
 | 
						xfs_fileoff_t		start_fsb;
 | 
				
			||||||
 | 
						xfs_fileoff_t		end_fsb;
 | 
				
			||||||
 | 
						int			error = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start_fsb = XFS_B_TO_FSB(mp, offset + written);
 | 
				
			||||||
 | 
						end_fsb = XFS_B_TO_FSB(mp, offset + length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Trim back delalloc blocks if we didn't manage to write the whole
 | 
				
			||||||
 | 
						 * range reserved.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * We don't need to care about racing delalloc as we hold i_mutex
 | 
				
			||||||
 | 
						 * across the reserve/allocate/unreserve calls. If there are delalloc
 | 
				
			||||||
 | 
						 * blocks in the range, they are ours.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (start_fsb < end_fsb) {
 | 
				
			||||||
 | 
							xfs_ilock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
							error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 | 
				
			||||||
 | 
										       end_fsb - start_fsb);
 | 
				
			||||||
 | 
							xfs_iunlock(ip, XFS_ILOCK_EXCL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (error && !XFS_FORCED_SHUTDOWN(mp)) {
 | 
				
			||||||
 | 
								xfs_alert(mp, "%s: unable to clean up ino %lld",
 | 
				
			||||||
 | 
									__func__, ip->i_ino);
 | 
				
			||||||
 | 
								return error;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int
 | 
				
			||||||
 | 
					xfs_file_iomap_end(
 | 
				
			||||||
 | 
						struct inode		*inode,
 | 
				
			||||||
 | 
						loff_t			offset,
 | 
				
			||||||
 | 
						loff_t			length,
 | 
				
			||||||
 | 
						ssize_t			written,
 | 
				
			||||||
 | 
						unsigned		flags,
 | 
				
			||||||
 | 
						struct iomap		*iomap)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
 | 
				
			||||||
 | 
							return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
 | 
				
			||||||
 | 
									length, written);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct iomap_ops xfs_iomap_ops = {
 | 
				
			||||||
 | 
						.iomap_begin		= xfs_file_iomap_begin,
 | 
				
			||||||
 | 
						.iomap_end		= xfs_file_iomap_end,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,7 +18,8 @@
 | 
				
			||||||
#ifndef __XFS_IOMAP_H__
 | 
					#ifndef __XFS_IOMAP_H__
 | 
				
			||||||
#define __XFS_IOMAP_H__
 | 
					#define __XFS_IOMAP_H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct iomap;
 | 
					#include <linux/iomap.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct xfs_inode;
 | 
					struct xfs_inode;
 | 
				
			||||||
struct xfs_bmbt_irec;
 | 
					struct xfs_bmbt_irec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,4 +34,6 @@ int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
 | 
				
			||||||
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
 | 
					void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
 | 
				
			||||||
		struct xfs_bmbt_irec *);
 | 
							struct xfs_bmbt_irec *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern struct iomap_ops xfs_iomap_ops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* __XFS_IOMAP_H__*/
 | 
					#endif /* __XFS_IOMAP_H__*/
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -38,6 +38,7 @@
 | 
				
			||||||
#include "xfs_dir2.h"
 | 
					#include "xfs_dir2.h"
 | 
				
			||||||
#include "xfs_trans_space.h"
 | 
					#include "xfs_trans_space.h"
 | 
				
			||||||
#include "xfs_pnfs.h"
 | 
					#include "xfs_pnfs.h"
 | 
				
			||||||
 | 
					#include "xfs_iomap.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/capability.h>
 | 
					#include <linux/capability.h>
 | 
				
			||||||
#include <linux/xattr.h>
 | 
					#include <linux/xattr.h>
 | 
				
			||||||
| 
						 | 
					@ -822,8 +823,8 @@ xfs_setattr_size(
 | 
				
			||||||
			error = dax_truncate_page(inode, newsize,
 | 
								error = dax_truncate_page(inode, newsize,
 | 
				
			||||||
					xfs_get_blocks_direct);
 | 
										xfs_get_blocks_direct);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			error = block_truncate_page(inode->i_mapping, newsize,
 | 
								error = iomap_truncate_page(inode, newsize,
 | 
				
			||||||
					xfs_get_blocks);
 | 
										&did_zeroing, &xfs_iomap_ops);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -838,8 +839,8 @@ xfs_setattr_size(
 | 
				
			||||||
	 * problem. Note that this includes any block zeroing we did above;
 | 
						 * problem. Note that this includes any block zeroing we did above;
 | 
				
			||||||
	 * otherwise those blocks may not be zeroed after a crash.
 | 
						 * otherwise those blocks may not be zeroed after a crash.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (newsize > ip->i_d.di_size &&
 | 
						if (did_zeroing ||
 | 
				
			||||||
	    (oldsize != ip->i_d.di_size || did_zeroing)) {
 | 
						    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
 | 
				
			||||||
		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
 | 
							error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
 | 
				
			||||||
						      ip->i_d.di_size, newsize);
 | 
											      ip->i_d.di_size, newsize);
 | 
				
			||||||
		if (error)
 | 
							if (error)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 | 
				
			||||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 | 
					DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 | 
				
			||||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 | 
					DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 | 
				
			||||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 | 
					DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 | 
				
			||||||
 | 
					DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 | 
				
			||||||
 | 
					DEFINE_IOMAP_EVENT(xfs_iomap_found);
 | 
				
			||||||
 | 
					DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DECLARE_EVENT_CLASS(xfs_simple_io_class,
 | 
					DECLARE_EVENT_CLASS(xfs_simple_io_class,
 | 
				
			||||||
	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
 | 
						TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue