mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	fs: introduce write_begin, write_end, and perform_write aops
These are intended to replace prepare_write and commit_write with more flexible alternatives that are also able to avoid the buffered write deadlock problems efficiently (which prepare_write is unable to do). [mark.fasheh@oracle.com: API design contributions, code review and fixes] [akpm@linux-foundation.org: various fixes] [dmonakhov@sw.ru: new aop block_write_begin fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									637aff46f9
								
							
						
					
					
						commit
						afddba49d1
					
				
					 11 changed files with 575 additions and 206 deletions
				
			
		| 
						 | 
					@ -178,15 +178,18 @@ prototypes:
 | 
				
			||||||
locking rules:
 | 
					locking rules:
 | 
				
			||||||
	All except set_page_dirty may block
 | 
						All except set_page_dirty may block
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			BKL	PageLocked(page)
 | 
								BKL	PageLocked(page)	i_sem
 | 
				
			||||||
writepage:		no	yes, unlocks (see below)
 | 
					writepage:		no	yes, unlocks (see below)
 | 
				
			||||||
readpage:		no	yes, unlocks
 | 
					readpage:		no	yes, unlocks
 | 
				
			||||||
sync_page:		no	maybe
 | 
					sync_page:		no	maybe
 | 
				
			||||||
writepages:		no
 | 
					writepages:		no
 | 
				
			||||||
set_page_dirty		no	no
 | 
					set_page_dirty		no	no
 | 
				
			||||||
readpages:		no
 | 
					readpages:		no
 | 
				
			||||||
prepare_write:		no	yes
 | 
					prepare_write:		no	yes			yes
 | 
				
			||||||
commit_write:		no	yes
 | 
					commit_write:		no	yes			yes
 | 
				
			||||||
 | 
					write_begin:		no	locks the page		yes
 | 
				
			||||||
 | 
					write_end:		no	yes, unlocks		yes
 | 
				
			||||||
 | 
					perform_write:		no	n/a			yes
 | 
				
			||||||
bmap:			yes
 | 
					bmap:			yes
 | 
				
			||||||
invalidatepage:		no	yes
 | 
					invalidatepage:		no	yes
 | 
				
			||||||
releasepage:		no	yes
 | 
					releasepage:		no	yes
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -537,6 +537,12 @@ struct address_space_operations {
 | 
				
			||||||
			struct list_head *pages, unsigned nr_pages);
 | 
								struct list_head *pages, unsigned nr_pages);
 | 
				
			||||||
	int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 | 
						int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 | 
				
			||||||
	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
 | 
						int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
 | 
				
			||||||
 | 
						int (*write_begin)(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
									struct page **pagep, void **fsdata);
 | 
				
			||||||
 | 
						int (*write_end)(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
									struct page *page, void *fsdata);
 | 
				
			||||||
	sector_t (*bmap)(struct address_space *, sector_t);
 | 
						sector_t (*bmap)(struct address_space *, sector_t);
 | 
				
			||||||
	int (*invalidatepage) (struct page *, unsigned long);
 | 
						int (*invalidatepage) (struct page *, unsigned long);
 | 
				
			||||||
	int (*releasepage) (struct page *, int);
 | 
						int (*releasepage) (struct page *, int);
 | 
				
			||||||
| 
						 | 
					@ -633,6 +639,45 @@ struct address_space_operations {
 | 
				
			||||||
        operations.  It should avoid returning an error if possible -
 | 
					        operations.  It should avoid returning an error if possible -
 | 
				
			||||||
        errors should have been handled by prepare_write.
 | 
					        errors should have been handled by prepare_write.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  write_begin: This is intended as a replacement for prepare_write. The
 | 
				
			||||||
 | 
						key differences being that:
 | 
				
			||||||
 | 
							- it returns a locked page (in *pagep) rather than being
 | 
				
			||||||
 | 
							  given a pre locked page;
 | 
				
			||||||
 | 
							- it must be able to cope with short writes (where the
 | 
				
			||||||
 | 
							  length passed to write_begin is greater than the number
 | 
				
			||||||
 | 
							  of bytes copied into the page).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Called by the generic buffered write code to ask the filesystem to
 | 
				
			||||||
 | 
						prepare to write len bytes at the given offset in the file. The
 | 
				
			||||||
 | 
						address_space should check that the write will be able to complete,
 | 
				
			||||||
 | 
						by allocating space if necessary and doing any other internal
 | 
				
			||||||
 | 
						housekeeping.  If the write will update parts of any basic-blocks on
 | 
				
			||||||
 | 
						storage, then those blocks should be pre-read (if they haven't been
 | 
				
			||||||
 | 
						read already) so that the updated blocks can be written out properly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        The filesystem must return the locked pagecache page for the specified
 | 
				
			||||||
 | 
						offset, in *pagep, for the caller to write into.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						flags is a field for AOP_FLAG_xxx flags, described in
 | 
				
			||||||
 | 
						include/linux/fs.h.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        A void * may be returned in fsdata, which then gets passed into
 | 
				
			||||||
 | 
					        write_end.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Returns 0 on success; < 0 on failure (which is the error code), in
 | 
				
			||||||
 | 
						which case write_end is not called.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  write_end: After a successful write_begin, and data copy, write_end must
 | 
				
			||||||
 | 
					        be called. len is the original len passed to write_begin, and copied
 | 
				
			||||||
 | 
					        is the amount that was able to be copied (copied == len is always true
 | 
				
			||||||
 | 
						if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        The filesystem must take care of unlocking the page and releasing it
 | 
				
			||||||
 | 
					        refcount, and updating i_size.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
 | 
				
			||||||
 | 
					        that were able to be copied into pagecache.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  bmap: called by the VFS to map a logical block offset within object to
 | 
					  bmap: called by the VFS to map a logical block offset within object to
 | 
				
			||||||
  	physical block number. This method is used by the FIBMAP
 | 
					  	physical block number. This method is used by the FIBMAP
 | 
				
			||||||
  	ioctl and for working with swap-files.  To be able to swap to
 | 
					  	ioctl and for working with swap-files.  To be able to swap to
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -204,14 +204,13 @@ lo_do_transfer(struct loop_device *lo, int cmd,
 | 
				
			||||||
 * do_lo_send_aops - helper for writing data to a loop device
 | 
					 * do_lo_send_aops - helper for writing data to a loop device
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This is the fast version for backing filesystems which implement the address
 | 
					 * This is the fast version for backing filesystems which implement the address
 | 
				
			||||||
 * space operations prepare_write and commit_write.
 | 
					 * space operations write_begin and write_end.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 | 
					static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 | 
				
			||||||
		int bsize, loff_t pos, struct page *page)
 | 
							int bsize, loff_t pos, struct page *unused)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 | 
						struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 | 
				
			||||||
	struct address_space *mapping = file->f_mapping;
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
	const struct address_space_operations *aops = mapping->a_ops;
 | 
					 | 
				
			||||||
	pgoff_t index;
 | 
						pgoff_t index;
 | 
				
			||||||
	unsigned offset, bv_offs;
 | 
						unsigned offset, bv_offs;
 | 
				
			||||||
	int len, ret;
 | 
						int len, ret;
 | 
				
			||||||
| 
						 | 
					@ -223,63 +222,47 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 | 
				
			||||||
	len = bvec->bv_len;
 | 
						len = bvec->bv_len;
 | 
				
			||||||
	while (len > 0) {
 | 
						while (len > 0) {
 | 
				
			||||||
		sector_t IV;
 | 
							sector_t IV;
 | 
				
			||||||
		unsigned size;
 | 
							unsigned size, copied;
 | 
				
			||||||
		int transfer_result;
 | 
							int transfer_result;
 | 
				
			||||||
 | 
							struct page *page;
 | 
				
			||||||
 | 
							void *fsdata;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 | 
							IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 | 
				
			||||||
		size = PAGE_CACHE_SIZE - offset;
 | 
							size = PAGE_CACHE_SIZE - offset;
 | 
				
			||||||
		if (size > len)
 | 
							if (size > len)
 | 
				
			||||||
			size = len;
 | 
								size = len;
 | 
				
			||||||
		page = grab_cache_page(mapping, index);
 | 
					
 | 
				
			||||||
		if (unlikely(!page))
 | 
							ret = pagecache_write_begin(file, mapping, pos, size, 0,
 | 
				
			||||||
 | 
												&page, &fsdata);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
			goto fail;
 | 
								goto fail;
 | 
				
			||||||
		ret = aops->prepare_write(file, page, offset,
 | 
					
 | 
				
			||||||
					  offset + size);
 | 
					 | 
				
			||||||
		if (unlikely(ret)) {
 | 
					 | 
				
			||||||
			if (ret == AOP_TRUNCATED_PAGE) {
 | 
					 | 
				
			||||||
				page_cache_release(page);
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			goto unlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 | 
							transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 | 
				
			||||||
				bvec->bv_page, bv_offs, size, IV);
 | 
									bvec->bv_page, bv_offs, size, IV);
 | 
				
			||||||
		if (unlikely(transfer_result)) {
 | 
							copied = size;
 | 
				
			||||||
			/*
 | 
					 | 
				
			||||||
			 * The transfer failed, but we still write the data to
 | 
					 | 
				
			||||||
			 * keep prepare/commit calls balanced.
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			printk(KERN_ERR "loop: transfer error block %llu\n",
 | 
					 | 
				
			||||||
			       (unsigned long long)index);
 | 
					 | 
				
			||||||
			zero_user_page(page, offset, size, KM_USER0);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		flush_dcache_page(page);
 | 
					 | 
				
			||||||
		ret = aops->commit_write(file, page, offset,
 | 
					 | 
				
			||||||
					 offset + size);
 | 
					 | 
				
			||||||
		if (unlikely(ret)) {
 | 
					 | 
				
			||||||
			if (ret == AOP_TRUNCATED_PAGE) {
 | 
					 | 
				
			||||||
				page_cache_release(page);
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			goto unlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if (unlikely(transfer_result))
 | 
							if (unlikely(transfer_result))
 | 
				
			||||||
			goto unlock;
 | 
								copied = 0;
 | 
				
			||||||
		bv_offs += size;
 | 
					
 | 
				
			||||||
		len -= size;
 | 
							ret = pagecache_write_end(file, mapping, pos, size, copied,
 | 
				
			||||||
 | 
												page, fsdata);
 | 
				
			||||||
 | 
							if (ret < 0)
 | 
				
			||||||
 | 
								goto fail;
 | 
				
			||||||
 | 
							if (ret < copied)
 | 
				
			||||||
 | 
								copied = ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (unlikely(transfer_result))
 | 
				
			||||||
 | 
								goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bv_offs += copied;
 | 
				
			||||||
 | 
							len -= copied;
 | 
				
			||||||
		offset = 0;
 | 
							offset = 0;
 | 
				
			||||||
		index++;
 | 
							index++;
 | 
				
			||||||
		pos += size;
 | 
							pos += copied;
 | 
				
			||||||
		unlock_page(page);
 | 
					 | 
				
			||||||
		page_cache_release(page);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	ret = 0;
 | 
						ret = 0;
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	mutex_unlock(&mapping->host->i_mutex);
 | 
						mutex_unlock(&mapping->host->i_mutex);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
unlock:
 | 
					 | 
				
			||||||
	unlock_page(page);
 | 
					 | 
				
			||||||
	page_cache_release(page);
 | 
					 | 
				
			||||||
fail:
 | 
					fail:
 | 
				
			||||||
	ret = -1;
 | 
						ret = -1;
 | 
				
			||||||
	goto out;
 | 
						goto out;
 | 
				
			||||||
| 
						 | 
					@ -313,7 +296,7 @@ static int __do_lo_send_write(struct file *file,
 | 
				
			||||||
 * do_lo_send_direct_write - helper for writing data to a loop device
 | 
					 * do_lo_send_direct_write - helper for writing data to a loop device
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This is the fast, non-transforming version for backing filesystems which do
 | 
					 * This is the fast, non-transforming version for backing filesystems which do
 | 
				
			||||||
 * not implement the address space operations prepare_write and commit_write.
 | 
					 * not implement the address space operations write_begin and write_end.
 | 
				
			||||||
 * It uses the write file operation which should be present on all writeable
 | 
					 * It uses the write file operation which should be present on all writeable
 | 
				
			||||||
 * filesystems.
 | 
					 * filesystems.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -332,7 +315,7 @@ static int do_lo_send_direct_write(struct loop_device *lo,
 | 
				
			||||||
 * do_lo_send_write - helper for writing data to a loop device
 | 
					 * do_lo_send_write - helper for writing data to a loop device
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This is the slow, transforming version for filesystems which do not
 | 
					 * This is the slow, transforming version for filesystems which do not
 | 
				
			||||||
 * implement the address space operations prepare_write and commit_write.  It
 | 
					 * implement the address space operations write_begin and write_end.  It
 | 
				
			||||||
 * uses the write file operation which should be present on all writeable
 | 
					 * uses the write file operation which should be present on all writeable
 | 
				
			||||||
 * filesystems.
 | 
					 * filesystems.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -780,7 +763,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (!file->f_op->splice_read)
 | 
							if (!file->f_op->splice_read)
 | 
				
			||||||
			goto out_putf;
 | 
								goto out_putf;
 | 
				
			||||||
		if (aops->prepare_write && aops->commit_write)
 | 
							if (aops->prepare_write || aops->write_begin)
 | 
				
			||||||
			lo_flags |= LO_FLAGS_USE_AOPS;
 | 
								lo_flags |= LO_FLAGS_USE_AOPS;
 | 
				
			||||||
		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 | 
							if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 | 
				
			||||||
			lo_flags |= LO_FLAGS_READ_ONLY;
 | 
								lo_flags |= LO_FLAGS_READ_ONLY;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										201
									
								
								fs/buffer.c
									
									
									
									
									
								
							
							
						
						
									
										201
									
								
								fs/buffer.c
									
									
									
									
									
								
							| 
						 | 
					@ -1770,6 +1770,48 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 | 
				
			||||||
	goto done;
 | 
						goto done;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * If a page has any new buffers, zero them out here, and mark them uptodate
 | 
				
			||||||
 | 
					 * and dirty so they'll be written out (in order to prevent uninitialised
 | 
				
			||||||
 | 
					 * block data from leaking). And clear the new bit.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int block_start, block_end;
 | 
				
			||||||
 | 
						struct buffer_head *head, *bh;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						BUG_ON(!PageLocked(page));
 | 
				
			||||||
 | 
						if (!page_has_buffers(page))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bh = head = page_buffers(page);
 | 
				
			||||||
 | 
						block_start = 0;
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							block_end = block_start + bh->b_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (buffer_new(bh)) {
 | 
				
			||||||
 | 
								if (block_end > from && block_start < to) {
 | 
				
			||||||
 | 
									if (!PageUptodate(page)) {
 | 
				
			||||||
 | 
										unsigned start, size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										start = max(from, block_start);
 | 
				
			||||||
 | 
										size = min(to, block_end) - start;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										zero_user_page(page, start, size, KM_USER0);
 | 
				
			||||||
 | 
										set_buffer_uptodate(bh);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									clear_buffer_new(bh);
 | 
				
			||||||
 | 
									mark_buffer_dirty(bh);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							block_start = block_end;
 | 
				
			||||||
 | 
							bh = bh->b_this_page;
 | 
				
			||||||
 | 
						} while (bh != head);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(page_zero_new_buffers);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __block_prepare_write(struct inode *inode, struct page *page,
 | 
					static int __block_prepare_write(struct inode *inode, struct page *page,
 | 
				
			||||||
		unsigned from, unsigned to, get_block_t *get_block)
 | 
							unsigned from, unsigned to, get_block_t *get_block)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1854,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 | 
				
			||||||
		if (!buffer_uptodate(*wait_bh))
 | 
							if (!buffer_uptodate(*wait_bh))
 | 
				
			||||||
			err = -EIO;
 | 
								err = -EIO;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (!err) {
 | 
						if (unlikely(err))
 | 
				
			||||||
		bh = head;
 | 
							page_zero_new_buffers(page, from, to);
 | 
				
			||||||
		do {
 | 
					 | 
				
			||||||
			if (buffer_new(bh))
 | 
					 | 
				
			||||||
				clear_buffer_new(bh);
 | 
					 | 
				
			||||||
		} while ((bh = bh->b_this_page) != head);
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	/* Error case: */
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Zero out any newly allocated blocks to avoid exposing stale
 | 
					 | 
				
			||||||
	 * data.  If BH_New is set, we know that the block was newly
 | 
					 | 
				
			||||||
	 * allocated in the above loop.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	bh = head;
 | 
					 | 
				
			||||||
	block_start = 0;
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		block_end = block_start+blocksize;
 | 
					 | 
				
			||||||
		if (block_end <= from)
 | 
					 | 
				
			||||||
			goto next_bh;
 | 
					 | 
				
			||||||
		if (block_start >= to)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		if (buffer_new(bh)) {
 | 
					 | 
				
			||||||
			clear_buffer_new(bh);
 | 
					 | 
				
			||||||
			zero_user_page(page, block_start, bh->b_size, KM_USER0);
 | 
					 | 
				
			||||||
			set_buffer_uptodate(bh);
 | 
					 | 
				
			||||||
			mark_buffer_dirty(bh);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
next_bh:
 | 
					 | 
				
			||||||
		block_start = block_end;
 | 
					 | 
				
			||||||
		bh = bh->b_this_page;
 | 
					 | 
				
			||||||
	} while (bh != head);
 | 
					 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1910,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 | 
				
			||||||
			set_buffer_uptodate(bh);
 | 
								set_buffer_uptodate(bh);
 | 
				
			||||||
			mark_buffer_dirty(bh);
 | 
								mark_buffer_dirty(bh);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							clear_buffer_new(bh);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -1923,6 +1936,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * block_write_begin takes care of the basic task of block allocation and
 | 
				
			||||||
 | 
					 * bringing partial write blocks uptodate first.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * If *pagep is not NULL, then block_write_begin uses the locked page
 | 
				
			||||||
 | 
					 * at *pagep rather than allocating its own. In this case, the page will
 | 
				
			||||||
 | 
					 * not be unlocked or deallocated on failure.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int block_write_begin(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
								struct page **pagep, void **fsdata,
 | 
				
			||||||
 | 
								get_block_t *get_block)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
						int status = 0;
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
						pgoff_t index;
 | 
				
			||||||
 | 
						unsigned start, end;
 | 
				
			||||||
 | 
						int ownpage = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						index = pos >> PAGE_CACHE_SHIFT;
 | 
				
			||||||
 | 
						start = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
						end = start + len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						page = *pagep;
 | 
				
			||||||
 | 
						if (page == NULL) {
 | 
				
			||||||
 | 
							ownpage = 1;
 | 
				
			||||||
 | 
							page = __grab_cache_page(mapping, index);
 | 
				
			||||||
 | 
							if (!page) {
 | 
				
			||||||
 | 
								status = -ENOMEM;
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							*pagep = page;
 | 
				
			||||||
 | 
						} else
 | 
				
			||||||
 | 
							BUG_ON(!PageLocked(page));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						status = __block_prepare_write(inode, page, start, end, get_block);
 | 
				
			||||||
 | 
						if (unlikely(status)) {
 | 
				
			||||||
 | 
							ClearPageUptodate(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ownpage) {
 | 
				
			||||||
 | 
								unlock_page(page);
 | 
				
			||||||
 | 
								page_cache_release(page);
 | 
				
			||||||
 | 
								*pagep = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * prepare_write() may have instantiated a few blocks
 | 
				
			||||||
 | 
								 * outside i_size.  Trim these off again. Don't need
 | 
				
			||||||
 | 
								 * i_size_read because we hold i_mutex.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (pos + len > inode->i_size)
 | 
				
			||||||
 | 
									vmtruncate(inode, inode->i_size);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						return status;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(block_write_begin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int block_write_end(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
								struct page *page, void *fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
						unsigned start;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						start = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(copied < len)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * The buffers that were written will now be uptodate, so we
 | 
				
			||||||
 | 
							 * don't have to worry about a readpage reading them and
 | 
				
			||||||
 | 
							 * overwriting a partial write. However if we have encountered
 | 
				
			||||||
 | 
							 * a short write and only partially written into a buffer, it
 | 
				
			||||||
 | 
							 * will not be marked uptodate, so a readpage might come in and
 | 
				
			||||||
 | 
							 * destroy our partial write.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Do the simplest thing, and just treat any short write to a
 | 
				
			||||||
 | 
							 * non uptodate page as a zero-length write, and force the
 | 
				
			||||||
 | 
							 * caller to redo the whole thing.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (!PageUptodate(page))
 | 
				
			||||||
 | 
								copied = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							page_zero_new_buffers(page, start+copied, start+len);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						flush_dcache_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* This could be a short (even 0-length) commit */
 | 
				
			||||||
 | 
						__block_commit_write(inode, page, start, start+copied);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return copied;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(block_write_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int generic_write_end(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
								struct page *page, void *fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * No need to use i_size_read() here, the i_size
 | 
				
			||||||
 | 
						 * cannot change under us because we hold i_mutex.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * But it's important to update i_size while still holding page lock:
 | 
				
			||||||
 | 
						 * page writeout could otherwise come in and zero beyond i_size.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (pos+copied > inode->i_size) {
 | 
				
			||||||
 | 
							i_size_write(inode, pos+copied);
 | 
				
			||||||
 | 
							mark_inode_dirty(inode);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						unlock_page(page);
 | 
				
			||||||
 | 
						page_cache_release(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return copied;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(generic_write_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Generic "read page" function for block devices that have the normal
 | 
					 * Generic "read page" function for block devices that have the normal
 | 
				
			||||||
 * get_block functionality. This is most of the block device filesystems.
 | 
					 * get_block functionality. This is most of the block device filesystems.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										44
									
								
								fs/libfs.c
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								fs/libfs.c
									
									
									
									
									
								
							| 
						 | 
					@ -351,6 +351,26 @@ int simple_prepare_write(struct file *file, struct page *page,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int simple_write_begin(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
								struct page **pagep, void **fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct page *page;
 | 
				
			||||||
 | 
						pgoff_t index;
 | 
				
			||||||
 | 
						unsigned from;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						index = pos >> PAGE_CACHE_SHIFT;
 | 
				
			||||||
 | 
						from = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						page = __grab_cache_page(mapping, index);
 | 
				
			||||||
 | 
						if (!page)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						*pagep = page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return simple_prepare_write(file, page, from, from+len);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int simple_commit_write(struct file *file, struct page *page,
 | 
					int simple_commit_write(struct file *file, struct page *page,
 | 
				
			||||||
			unsigned from, unsigned to)
 | 
								unsigned from, unsigned to)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -369,6 +389,28 @@ int simple_commit_write(struct file *file, struct page *page,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int simple_write_end(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
								struct page *page, void *fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* zero the stale part of the page if we did a short copy */
 | 
				
			||||||
 | 
						if (copied < len) {
 | 
				
			||||||
 | 
							void *kaddr = kmap_atomic(page, KM_USER0);
 | 
				
			||||||
 | 
							memset(kaddr + from + copied, 0, len - copied);
 | 
				
			||||||
 | 
							flush_dcache_page(page);
 | 
				
			||||||
 | 
							kunmap_atomic(kaddr, KM_USER0);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						simple_commit_write(file, page, from, from+copied);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						unlock_page(page);
 | 
				
			||||||
 | 
						page_cache_release(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return copied;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * the inodes created here are not hashed. If you use iunique to generate
 | 
					 * the inodes created here are not hashed. If you use iunique to generate
 | 
				
			||||||
 * unique inode values later for this filesystem, then you must take care
 | 
					 * unique inode values later for this filesystem, then you must take care
 | 
				
			||||||
| 
						 | 
					@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open);
 | 
				
			||||||
EXPORT_SYMBOL(dcache_readdir);
 | 
					EXPORT_SYMBOL(dcache_readdir);
 | 
				
			||||||
EXPORT_SYMBOL(generic_read_dir);
 | 
					EXPORT_SYMBOL(generic_read_dir);
 | 
				
			||||||
EXPORT_SYMBOL(get_sb_pseudo);
 | 
					EXPORT_SYMBOL(get_sb_pseudo);
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(simple_write_begin);
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(simple_write_end);
 | 
				
			||||||
EXPORT_SYMBOL(simple_commit_write);
 | 
					EXPORT_SYMBOL(simple_commit_write);
 | 
				
			||||||
EXPORT_SYMBOL(simple_dir_inode_operations);
 | 
					EXPORT_SYMBOL(simple_dir_inode_operations);
 | 
				
			||||||
EXPORT_SYMBOL(simple_dir_operations);
 | 
					EXPORT_SYMBOL(simple_dir_operations);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										46
									
								
								fs/namei.c
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								fs/namei.c
									
									
									
									
									
								
							| 
						 | 
					@ -2729,53 +2729,29 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct address_space *mapping = inode->i_mapping;
 | 
						struct address_space *mapping = inode->i_mapping;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
 | 
						void *fsdata;
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
	char *kaddr;
 | 
						char *kaddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
retry:
 | 
					retry:
 | 
				
			||||||
	err = -ENOMEM;
 | 
						err = pagecache_write_begin(NULL, mapping, 0, len-1,
 | 
				
			||||||
	page = find_or_create_page(mapping, 0, gfp_mask);
 | 
									AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
 | 
				
			||||||
	if (!page)
 | 
					 | 
				
			||||||
		goto fail;
 | 
					 | 
				
			||||||
	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
 | 
					 | 
				
			||||||
	if (err == AOP_TRUNCATED_PAGE) {
 | 
					 | 
				
			||||||
		page_cache_release(page);
 | 
					 | 
				
			||||||
		goto retry;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		goto fail_map;
 | 
							goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kaddr = kmap_atomic(page, KM_USER0);
 | 
						kaddr = kmap_atomic(page, KM_USER0);
 | 
				
			||||||
	memcpy(kaddr, symname, len-1);
 | 
						memcpy(kaddr, symname, len-1);
 | 
				
			||||||
	kunmap_atomic(kaddr, KM_USER0);
 | 
						kunmap_atomic(kaddr, KM_USER0);
 | 
				
			||||||
	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
 | 
					
 | 
				
			||||||
	if (err == AOP_TRUNCATED_PAGE) {
 | 
						err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
 | 
				
			||||||
		page_cache_release(page);
 | 
												page, fsdata);
 | 
				
			||||||
		goto retry;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (err)
 | 
					 | 
				
			||||||
		goto fail_map;
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Notice that we are _not_ going to block here - end of page is
 | 
					 | 
				
			||||||
	 * unmapped, so this will only try to map the rest of page, see
 | 
					 | 
				
			||||||
	 * that it is unmapped (typically even will not look into inode -
 | 
					 | 
				
			||||||
	 * ->i_size will be enough for everything) and zero it out.
 | 
					 | 
				
			||||||
	 * OTOH it's obviously correct and should make the page up-to-date.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (!PageUptodate(page)) {
 | 
					 | 
				
			||||||
		err = mapping->a_ops->readpage(NULL, page);
 | 
					 | 
				
			||||||
		if (err != AOP_TRUNCATED_PAGE)
 | 
					 | 
				
			||||||
			wait_on_page_locked(page);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		unlock_page(page);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	page_cache_release(page);
 | 
					 | 
				
			||||||
	if (err < 0)
 | 
						if (err < 0)
 | 
				
			||||||
		goto fail;
 | 
							goto fail;
 | 
				
			||||||
 | 
						if (err < len-1)
 | 
				
			||||||
 | 
							goto retry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mark_inode_dirty(inode);
 | 
						mark_inode_dirty(inode);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
fail_map:
 | 
					 | 
				
			||||||
	unlock_page(page);
 | 
					 | 
				
			||||||
	page_cache_release(page);
 | 
					 | 
				
			||||||
fail:
 | 
					fail:
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										69
									
								
								fs/splice.c
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								fs/splice.c
									
									
									
									
									
								
							| 
						 | 
					@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 | 
				
			||||||
	struct address_space *mapping = file->f_mapping;
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
	unsigned int offset, this_len;
 | 
						unsigned int offset, this_len;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	pgoff_t index;
 | 
						void *fsdata;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 | 
				
			||||||
	if (unlikely(ret))
 | 
						if (unlikely(ret))
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	index = sd->pos >> PAGE_CACHE_SHIFT;
 | 
					 | 
				
			||||||
	offset = sd->pos & ~PAGE_CACHE_MASK;
 | 
						offset = sd->pos & ~PAGE_CACHE_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	this_len = sd->len;
 | 
						this_len = sd->len;
 | 
				
			||||||
	if (this_len + offset > PAGE_CACHE_SIZE)
 | 
						if (this_len + offset > PAGE_CACHE_SIZE)
 | 
				
			||||||
		this_len = PAGE_CACHE_SIZE - offset;
 | 
							this_len = PAGE_CACHE_SIZE - offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
find_page:
 | 
						ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
 | 
				
			||||||
	page = find_lock_page(mapping, index);
 | 
									AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
 | 
				
			||||||
	if (!page) {
 | 
						if (unlikely(ret))
 | 
				
			||||||
		ret = -ENOMEM;
 | 
							goto out;
 | 
				
			||||||
		page = page_cache_alloc_cold(mapping);
 | 
					 | 
				
			||||||
		if (unlikely(!page))
 | 
					 | 
				
			||||||
			goto out_ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * This will also lock the page
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		ret = add_to_page_cache_lru(page, mapping, index,
 | 
					 | 
				
			||||||
					    GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (unlikely(ret))
 | 
					 | 
				
			||||||
			goto out_release;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
 | 
					 | 
				
			||||||
	if (unlikely(ret)) {
 | 
					 | 
				
			||||||
		loff_t isize = i_size_read(mapping->host);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (ret != AOP_TRUNCATED_PAGE)
 | 
					 | 
				
			||||||
			unlock_page(page);
 | 
					 | 
				
			||||||
		page_cache_release(page);
 | 
					 | 
				
			||||||
		if (ret == AOP_TRUNCATED_PAGE)
 | 
					 | 
				
			||||||
			goto find_page;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * prepare_write() may have instantiated a few blocks
 | 
					 | 
				
			||||||
		 * outside i_size.  Trim these off again.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (sd->pos + this_len > isize)
 | 
					 | 
				
			||||||
			vmtruncate(mapping->host, isize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		goto out_ret;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (buf->page != page) {
 | 
						if (buf->page != page) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
| 
						 | 
					@ -629,31 +596,9 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 | 
				
			||||||
		kunmap_atomic(dst, KM_USER1);
 | 
							kunmap_atomic(dst, KM_USER1);
 | 
				
			||||||
		buf->ops->unmap(pipe, buf, src);
 | 
							buf->ops->unmap(pipe, buf, src);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
 | 
				
			||||||
	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
 | 
									page, fsdata);
 | 
				
			||||||
	if (ret) {
 | 
					 | 
				
			||||||
		if (ret == AOP_TRUNCATED_PAGE) {
 | 
					 | 
				
			||||||
			page_cache_release(page);
 | 
					 | 
				
			||||||
			goto find_page;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if (ret < 0)
 | 
					 | 
				
			||||||
			goto out;
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Partial write has happened, so 'ret' already initialized by
 | 
					 | 
				
			||||||
		 * number of bytes written, Where is nothing we have to do here.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
		ret = this_len;
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Return the number of bytes written and mark page as
 | 
					 | 
				
			||||||
	 * accessed, we are now done!
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	mark_page_accessed(page);
 | 
					 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	unlock_page(page);
 | 
					 | 
				
			||||||
out_release:
 | 
					 | 
				
			||||||
	page_cache_release(page);
 | 
					 | 
				
			||||||
out_ret:
 | 
					 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -203,6 +203,16 @@ void block_invalidatepage(struct page *page, unsigned long offset);
 | 
				
			||||||
int block_write_full_page(struct page *page, get_block_t *get_block,
 | 
					int block_write_full_page(struct page *page, get_block_t *get_block,
 | 
				
			||||||
				struct writeback_control *wbc);
 | 
									struct writeback_control *wbc);
 | 
				
			||||||
int block_read_full_page(struct page*, get_block_t*);
 | 
					int block_read_full_page(struct page*, get_block_t*);
 | 
				
			||||||
 | 
					int block_write_begin(struct file *, struct address_space *,
 | 
				
			||||||
 | 
									loff_t, unsigned, unsigned,
 | 
				
			||||||
 | 
									struct page **, void **, get_block_t*);
 | 
				
			||||||
 | 
					int block_write_end(struct file *, struct address_space *,
 | 
				
			||||||
 | 
									loff_t, unsigned, unsigned,
 | 
				
			||||||
 | 
									struct page *, void *);
 | 
				
			||||||
 | 
					int generic_write_end(struct file *, struct address_space *,
 | 
				
			||||||
 | 
									loff_t, unsigned, unsigned,
 | 
				
			||||||
 | 
									struct page *, void *);
 | 
				
			||||||
 | 
					void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
 | 
				
			||||||
int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 | 
					int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 | 
				
			||||||
int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 | 
					int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 | 
				
			||||||
				loff_t *);
 | 
									loff_t *);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -394,6 +394,8 @@ enum positive_aop_returns {
 | 
				
			||||||
	AOP_TRUNCATED_PAGE	= 0x80001,
 | 
						AOP_TRUNCATED_PAGE	= 0x80001,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * oh the beauties of C type declarations.
 | 
					 * oh the beauties of C type declarations.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -413,7 +415,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 | 
				
			||||||
size_t iov_iter_copy_from_user(struct page *page,
 | 
					size_t iov_iter_copy_from_user(struct page *page,
 | 
				
			||||||
		struct iov_iter *i, unsigned long offset, size_t bytes);
 | 
							struct iov_iter *i, unsigned long offset, size_t bytes);
 | 
				
			||||||
void iov_iter_advance(struct iov_iter *i, size_t bytes);
 | 
					void iov_iter_advance(struct iov_iter *i, size_t bytes);
 | 
				
			||||||
int iov_iter_fault_in_readable(struct iov_iter *i);
 | 
					int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 | 
				
			||||||
size_t iov_iter_single_seg_count(struct iov_iter *i);
 | 
					size_t iov_iter_single_seg_count(struct iov_iter *i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void iov_iter_init(struct iov_iter *i,
 | 
					static inline void iov_iter_init(struct iov_iter *i,
 | 
				
			||||||
| 
						 | 
					@ -454,6 +456,14 @@ struct address_space_operations {
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 | 
						int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 | 
				
			||||||
	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
 | 
						int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int (*write_begin)(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
									struct page **pagep, void **fsdata);
 | 
				
			||||||
 | 
						int (*write_end)(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
									struct page *page, void *fsdata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 | 
						/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 | 
				
			||||||
	sector_t (*bmap)(struct address_space *, sector_t);
 | 
						sector_t (*bmap)(struct address_space *, sector_t);
 | 
				
			||||||
	void (*invalidatepage) (struct page *, unsigned long);
 | 
						void (*invalidatepage) (struct page *, unsigned long);
 | 
				
			||||||
| 
						 | 
					@ -468,6 +478,18 @@ struct address_space_operations {
 | 
				
			||||||
	int (*launder_page) (struct page *);
 | 
						int (*launder_page) (struct page *);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * pagecache_write_begin/pagecache_write_end must be used by general code
 | 
				
			||||||
 | 
					 * to write into the pagecache.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int pagecache_write_begin(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
									struct page **pagep, void **fsdata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int pagecache_write_end(struct file *, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
									struct page *page, void *fsdata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct backing_dev_info;
 | 
					struct backing_dev_info;
 | 
				
			||||||
struct address_space {
 | 
					struct address_space {
 | 
				
			||||||
	struct inode		*host;		/* owner: inode, block_device */
 | 
						struct inode		*host;		/* owner: inode, block_device */
 | 
				
			||||||
| 
						 | 
					@ -1866,6 +1888,12 @@ extern int simple_prepare_write(struct file *file, struct page *page,
 | 
				
			||||||
			unsigned offset, unsigned to);
 | 
								unsigned offset, unsigned to);
 | 
				
			||||||
extern int simple_commit_write(struct file *file, struct page *page,
 | 
					extern int simple_commit_write(struct file *file, struct page *page,
 | 
				
			||||||
				unsigned offset, unsigned to);
 | 
									unsigned offset, unsigned to);
 | 
				
			||||||
 | 
					extern int simple_write_begin(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
								struct page **pagep, void **fsdata);
 | 
				
			||||||
 | 
					extern int simple_write_end(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
								loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
								struct page *page, void *fsdata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
 | 
					extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
 | 
				
			||||||
extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
 | 
					extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -96,6 +96,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 | 
				
			||||||
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 | 
					unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 | 
				
			||||||
			int tag, unsigned int nr_pages, struct page **pages);
 | 
								int tag, unsigned int nr_pages, struct page **pages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Returns locked page at given index in given cache, creating it if needed.
 | 
					 * Returns locked page at given index in given cache, creating it if needed.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										250
									
								
								mm/filemap.c
									
									
									
									
									
								
							
							
						
						
									
										250
									
								
								mm/filemap.c
									
									
									
									
									
								
							| 
						 | 
					@ -1742,11 +1742,20 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
 | 
				
			||||||
	i->count -= bytes;
 | 
						i->count -= bytes;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int iov_iter_fault_in_readable(struct iov_iter *i)
 | 
					/*
 | 
				
			||||||
 | 
					 * Fault in the first iovec of the given iov_iter, to a maximum length
 | 
				
			||||||
 | 
					 * of bytes. Returns 0 on success, or non-zero if the memory could not be
 | 
				
			||||||
 | 
					 * accessed (ie. because it is an invalid address).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * writev-intensive code may want this to prefault several iovecs -- that
 | 
				
			||||||
 | 
					 * would be possible (callers must not rely on the fact that _only_ the
 | 
				
			||||||
 | 
					 * first iovec will be faulted with the current implementation).
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	size_t seglen = min(i->iov->iov_len - i->iov_offset, i->count);
 | 
					 | 
				
			||||||
	char __user *buf = i->iov->iov_base + i->iov_offset;
 | 
						char __user *buf = i->iov->iov_base + i->iov_offset;
 | 
				
			||||||
	return fault_in_pages_readable(buf, seglen);
 | 
						bytes = min(bytes, i->iov->iov_len - i->iov_offset);
 | 
				
			||||||
 | 
						return fault_in_pages_readable(buf, bytes);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -1843,6 +1852,95 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(generic_write_checks);
 | 
					EXPORT_SYMBOL(generic_write_checks);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int pagecache_write_begin(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned flags,
 | 
				
			||||||
 | 
									struct page **pagep, void **fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct address_space_operations *aops = mapping->a_ops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (aops->write_begin) {
 | 
				
			||||||
 | 
							return aops->write_begin(file, mapping, pos, len, flags,
 | 
				
			||||||
 | 
												pagep, fsdata);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							int ret;
 | 
				
			||||||
 | 
							pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 | 
				
			||||||
 | 
							unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
							struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
							struct page *page;
 | 
				
			||||||
 | 
					again:
 | 
				
			||||||
 | 
							page = __grab_cache_page(mapping, index);
 | 
				
			||||||
 | 
							*pagep = page;
 | 
				
			||||||
 | 
							if (!page)
 | 
				
			||||||
 | 
								return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * There is no way to resolve a short write situation
 | 
				
			||||||
 | 
								 * for a !Uptodate page (except by double copying in
 | 
				
			||||||
 | 
								 * the caller done by generic_perform_write_2copy).
 | 
				
			||||||
 | 
								 *
 | 
				
			||||||
 | 
								 * Instead, we have to bring it uptodate here.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								ret = aops->readpage(file, page);
 | 
				
			||||||
 | 
								page_cache_release(page);
 | 
				
			||||||
 | 
								if (ret) {
 | 
				
			||||||
 | 
									if (ret == AOP_TRUNCATED_PAGE)
 | 
				
			||||||
 | 
										goto again;
 | 
				
			||||||
 | 
									return ret;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								goto again;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = aops->prepare_write(file, page, offset, offset+len);
 | 
				
			||||||
 | 
							if (ret) {
 | 
				
			||||||
 | 
								if (ret != AOP_TRUNCATED_PAGE)
 | 
				
			||||||
 | 
									unlock_page(page);
 | 
				
			||||||
 | 
								page_cache_release(page);
 | 
				
			||||||
 | 
								if (pos + len > inode->i_size)
 | 
				
			||||||
 | 
									vmtruncate(inode, inode->i_size);
 | 
				
			||||||
 | 
								if (ret == AOP_TRUNCATED_PAGE)
 | 
				
			||||||
 | 
									goto again;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(pagecache_write_begin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int pagecache_write_end(struct file *file, struct address_space *mapping,
 | 
				
			||||||
 | 
									loff_t pos, unsigned len, unsigned copied,
 | 
				
			||||||
 | 
									struct page *page, void *fsdata)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct address_space_operations *aops = mapping->a_ops;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (aops->write_end) {
 | 
				
			||||||
 | 
							mark_page_accessed(page);
 | 
				
			||||||
 | 
							ret = aops->write_end(file, mapping, pos, len, copied,
 | 
				
			||||||
 | 
												page, fsdata);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 | 
				
			||||||
 | 
							struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							flush_dcache_page(page);
 | 
				
			||||||
 | 
							ret = aops->commit_write(file, page, offset, offset+len);
 | 
				
			||||||
 | 
							unlock_page(page);
 | 
				
			||||||
 | 
							mark_page_accessed(page);
 | 
				
			||||||
 | 
							page_cache_release(page);
 | 
				
			||||||
 | 
							BUG_ON(ret == AOP_TRUNCATED_PAGE); /* can't deal with */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ret < 0) {
 | 
				
			||||||
 | 
								if (pos + len > inode->i_size)
 | 
				
			||||||
 | 
									vmtruncate(inode, inode->i_size);
 | 
				
			||||||
 | 
							} else if (ret > 0)
 | 
				
			||||||
 | 
								ret = min_t(size_t, copied, ret);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								ret = copied;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(pagecache_write_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ssize_t
 | 
					ssize_t
 | 
				
			||||||
generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 | 
					generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
		unsigned long *nr_segs, loff_t pos, loff_t *ppos,
 | 
							unsigned long *nr_segs, loff_t pos, loff_t *ppos,
 | 
				
			||||||
| 
						 | 
					@ -1886,8 +1984,7 @@ EXPORT_SYMBOL(generic_file_direct_write);
 | 
				
			||||||
 * Find or create a page at the given pagecache position. Return the locked
 | 
					 * Find or create a page at the given pagecache position. Return the locked
 | 
				
			||||||
 * page. This function is specifically for buffered writes.
 | 
					 * page. This function is specifically for buffered writes.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static struct page *__grab_cache_page(struct address_space *mapping,
 | 
					struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
 | 
				
			||||||
							pgoff_t index)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int status;
 | 
						int status;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
| 
						 | 
					@ -1908,20 +2005,16 @@ static struct page *__grab_cache_page(struct address_space *mapping,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return page;
 | 
						return page;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__grab_cache_page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ssize_t
 | 
					static ssize_t generic_perform_write_2copy(struct file *file,
 | 
				
			||||||
generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
									struct iov_iter *i, loff_t pos)
 | 
				
			||||||
		unsigned long nr_segs, loff_t pos, loff_t *ppos,
 | 
					 | 
				
			||||||
		size_t count, ssize_t written)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct file *file = iocb->ki_filp;
 | 
					 | 
				
			||||||
	struct address_space *mapping = file->f_mapping;
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
	const struct address_space_operations *a_ops = mapping->a_ops;
 | 
						const struct address_space_operations *a_ops = mapping->a_ops;
 | 
				
			||||||
	struct inode 	*inode = mapping->host;
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
	long		status = 0;
 | 
						long status = 0;
 | 
				
			||||||
	struct iov_iter i;
 | 
						ssize_t written = 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	iov_iter_init(&i, iov, nr_segs, count, written);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		struct page *src_page;
 | 
							struct page *src_page;
 | 
				
			||||||
| 
						 | 
					@ -1934,7 +2027,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
		offset = (pos & (PAGE_CACHE_SIZE - 1));
 | 
							offset = (pos & (PAGE_CACHE_SIZE - 1));
 | 
				
			||||||
		index = pos >> PAGE_CACHE_SHIFT;
 | 
							index = pos >> PAGE_CACHE_SHIFT;
 | 
				
			||||||
		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
 | 
							bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
 | 
				
			||||||
						iov_iter_count(&i));
 | 
											iov_iter_count(i));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * a non-NULL src_page indicates that we're doing the
 | 
							 * a non-NULL src_page indicates that we're doing the
 | 
				
			||||||
| 
						 | 
					@ -1952,7 +2045,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
		 * to check that the address is actually valid, when atomic
 | 
							 * to check that the address is actually valid, when atomic
 | 
				
			||||||
		 * usercopies are used, below.
 | 
							 * usercopies are used, below.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (unlikely(iov_iter_fault_in_readable(&i))) {
 | 
							if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
 | 
				
			||||||
			status = -EFAULT;
 | 
								status = -EFAULT;
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -1983,7 +2076,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
			 * same reason as we can't take a page fault with a
 | 
								 * same reason as we can't take a page fault with a
 | 
				
			||||||
			 * page locked (as explained below).
 | 
								 * page locked (as explained below).
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			copied = iov_iter_copy_from_user(src_page, &i,
 | 
								copied = iov_iter_copy_from_user(src_page, i,
 | 
				
			||||||
								offset, bytes);
 | 
													offset, bytes);
 | 
				
			||||||
			if (unlikely(copied == 0)) {
 | 
								if (unlikely(copied == 0)) {
 | 
				
			||||||
				status = -EFAULT;
 | 
									status = -EFAULT;
 | 
				
			||||||
| 
						 | 
					@ -2008,7 +2101,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
				page_cache_release(src_page);
 | 
									page_cache_release(src_page);
 | 
				
			||||||
				continue;
 | 
									continue;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		status = a_ops->prepare_write(file, page, offset, offset+bytes);
 | 
							status = a_ops->prepare_write(file, page, offset, offset+bytes);
 | 
				
			||||||
| 
						 | 
					@ -2030,7 +2122,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
			 * really matter.
 | 
								 * really matter.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			pagefault_disable();
 | 
								pagefault_disable();
 | 
				
			||||||
			copied = iov_iter_copy_from_user_atomic(page, &i,
 | 
								copied = iov_iter_copy_from_user_atomic(page, i,
 | 
				
			||||||
								offset, bytes);
 | 
													offset, bytes);
 | 
				
			||||||
			pagefault_enable();
 | 
								pagefault_enable();
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
| 
						 | 
					@ -2056,9 +2148,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
		if (src_page)
 | 
							if (src_page)
 | 
				
			||||||
			page_cache_release(src_page);
 | 
								page_cache_release(src_page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		iov_iter_advance(&i, copied);
 | 
							iov_iter_advance(i, copied);
 | 
				
			||||||
		written += copied;
 | 
					 | 
				
			||||||
		pos += copied;
 | 
							pos += copied;
 | 
				
			||||||
 | 
							written += copied;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		balance_dirty_pages_ratelimited(mapping);
 | 
							balance_dirty_pages_ratelimited(mapping);
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
| 
						 | 
					@ -2082,13 +2174,117 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	} while (iov_iter_count(&i));
 | 
						} while (iov_iter_count(i));
 | 
				
			||||||
	*ppos = pos;
 | 
					
 | 
				
			||||||
 | 
						return written ? written : status;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static ssize_t generic_perform_write(struct file *file,
 | 
				
			||||||
 | 
									struct iov_iter *i, loff_t pos)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
 | 
						const struct address_space_operations *a_ops = mapping->a_ops;
 | 
				
			||||||
 | 
						long status = 0;
 | 
				
			||||||
 | 
						ssize_t written = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							struct page *page;
 | 
				
			||||||
 | 
							pgoff_t index;		/* Pagecache index for current page */
 | 
				
			||||||
 | 
							unsigned long offset;	/* Offset into pagecache page */
 | 
				
			||||||
 | 
							unsigned long bytes;	/* Bytes to write to page */
 | 
				
			||||||
 | 
							size_t copied;		/* Bytes copied from user */
 | 
				
			||||||
 | 
							void *fsdata;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							offset = (pos & (PAGE_CACHE_SIZE - 1));
 | 
				
			||||||
 | 
							index = pos >> PAGE_CACHE_SHIFT;
 | 
				
			||||||
 | 
							bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
 | 
				
			||||||
 | 
											iov_iter_count(i));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					again:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Bring in the user page that we will copy from _first_.
 | 
				
			||||||
 | 
							 * Otherwise there's a nasty deadlock on copying from the
 | 
				
			||||||
 | 
							 * same page as we're writing to, without it being marked
 | 
				
			||||||
 | 
							 * up-to-date.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Not only is this an optimisation, but it is also required
 | 
				
			||||||
 | 
							 * to check that the address is actually valid, when atomic
 | 
				
			||||||
 | 
							 * usercopies are used, below.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
 | 
				
			||||||
 | 
								status = -EFAULT;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							status = a_ops->write_begin(file, mapping, pos, bytes, 0,
 | 
				
			||||||
 | 
											&page, &fsdata);
 | 
				
			||||||
 | 
							if (unlikely(status))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							pagefault_disable();
 | 
				
			||||||
 | 
							copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
 | 
				
			||||||
 | 
							pagefault_enable();
 | 
				
			||||||
 | 
							flush_dcache_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							status = a_ops->write_end(file, mapping, pos, bytes, copied,
 | 
				
			||||||
 | 
											page, fsdata);
 | 
				
			||||||
 | 
							if (unlikely(status < 0))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							copied = status;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							cond_resched();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (unlikely(copied == 0)) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * If we were unable to copy any data at all, we must
 | 
				
			||||||
 | 
								 * fall back to a single segment length write.
 | 
				
			||||||
 | 
								 *
 | 
				
			||||||
 | 
								 * If we didn't fallback here, we could livelock
 | 
				
			||||||
 | 
								 * because not all segments in the iov can be copied at
 | 
				
			||||||
 | 
								 * once without a pagefault.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
 | 
				
			||||||
 | 
											iov_iter_single_seg_count(i));
 | 
				
			||||||
 | 
								goto again;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							iov_iter_advance(i, copied);
 | 
				
			||||||
 | 
							pos += copied;
 | 
				
			||||||
 | 
							written += copied;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							balance_dirty_pages_ratelimited(mapping);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						} while (iov_iter_count(i));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return written ? written : status;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ssize_t
 | 
				
			||||||
 | 
					generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 | 
				
			||||||
 | 
							unsigned long nr_segs, loff_t pos, loff_t *ppos,
 | 
				
			||||||
 | 
							size_t count, ssize_t written)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct file *file = iocb->ki_filp;
 | 
				
			||||||
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
 | 
						const struct address_space_operations *a_ops = mapping->a_ops;
 | 
				
			||||||
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
 | 
						ssize_t status;
 | 
				
			||||||
 | 
						struct iov_iter i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						iov_iter_init(&i, iov, nr_segs, count, written);
 | 
				
			||||||
 | 
						if (a_ops->write_begin)
 | 
				
			||||||
 | 
							status = generic_perform_write(file, &i, pos);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							status = generic_perform_write_2copy(file, &i, pos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (likely(status >= 0)) {
 | 
						if (likely(status >= 0)) {
 | 
				
			||||||
 | 
							written += status;
 | 
				
			||||||
 | 
							*ppos = pos + status;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * For now, when the user asks for O_SYNC, we'll actually give
 | 
				
			||||||
 | 
							 * O_DSYNC
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 | 
							if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 | 
				
			||||||
			if (!a_ops->writepage || !is_sync_kiocb(iocb))
 | 
								if (!a_ops->writepage || !is_sync_kiocb(iocb))
 | 
				
			||||||
				status = generic_osync_inode(inode, mapping,
 | 
									status = generic_osync_inode(inode, mapping,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue