mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	ext4: Add delayed allocation support in data=writeback mode
Updated with fixes from Mingming Cao <cmm@us.ibm.com> to unlock and release the page from page cache if the delalloc write_begin failed, and properly handle preallocated blocks. Also added a fix to clear buffer_delay in block_write_full_page() after allocating a delayed buffer. Updated with fixes from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to update i_disksize properly and to add bmap support for delayed allocation. Updated with a fix from Valerie Clement <valerie.clement@bull.net> to avoid filesystem corruption when the filesystem is mounted with the delalloc option and blocksize < pagesize. Signed-off-by: Alex Tomas <alex@clusterfs.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
This commit is contained in:
		
							parent
							
								
									29a814d2ee
								
							
						
					
					
						commit
						64769240bd
					
				
					 3 changed files with 700 additions and 6 deletions
				
			
		|  | @ -536,6 +536,7 @@ do {									       \ | |||
| #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */ | ||||
| #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */ | ||||
| #define EXT4_MOUNT_MBALLOC		0x4000000 /* Buddy allocation support */ | ||||
| #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */ | ||||
| /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | ||||
| #ifndef _LINUX_EXT2_FS_H | ||||
| #define clear_opt(o, opt)		o &= ~EXT4_MOUNT_##opt | ||||
|  |  | |||
							
								
								
									
										699
									
								
								fs/ext4/inode.c
									
									
									
									
									
								
							
							
						
						
									
										699
									
								
								fs/ext4/inode.c
									
									
									
									
									
								
							|  | @ -32,6 +32,7 @@ | |||
| #include <linux/string.h> | ||||
| #include <linux/buffer_head.h> | ||||
| #include <linux/writeback.h> | ||||
| #include <linux/pagevec.h> | ||||
| #include <linux/mpage.h> | ||||
| #include <linux/uio.h> | ||||
| #include <linux/bio.h> | ||||
|  | @ -46,6 +47,8 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
| 						   new_size); | ||||
| } | ||||
| 
 | ||||
| static void ext4_invalidatepage(struct page *page, unsigned long offset); | ||||
| 
 | ||||
| /*
 | ||||
|  * Test whether an inode is a fast symlink. | ||||
|  */ | ||||
|  | @ -1407,6 +1410,669 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 	return ret ? ret : copied; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Delayed allocation stuff | ||||
|  */ | ||||
| 
 | ||||
| struct mpage_da_data { | ||||
| 	struct inode *inode; | ||||
| 	struct buffer_head lbh;			/* extent of blocks */ | ||||
| 	unsigned long first_page, next_page;	/* extent of pages */ | ||||
| 	get_block_t *get_block; | ||||
| 	struct writeback_control *wbc; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * mpage_da_submit_io - walks through extent of pages and try to write | ||||
|  * them with __mpage_writepage() | ||||
|  * | ||||
|  * @mpd->inode: inode | ||||
|  * @mpd->first_page: first page of the extent | ||||
|  * @mpd->next_page: page after the last page of the extent | ||||
|  * @mpd->get_block: the filesystem's block mapper function | ||||
|  * | ||||
|  * By the time mpage_da_submit_io() is called we expect all blocks | ||||
|  * to be allocated. this may be wrong if allocation failed. | ||||
|  * | ||||
|  * As pages are already locked by write_cache_pages(), we can't use it | ||||
|  */ | ||||
| static int mpage_da_submit_io(struct mpage_da_data *mpd) | ||||
| { | ||||
| 	struct address_space *mapping = mpd->inode->i_mapping; | ||||
| 	struct mpage_data mpd_pp = { | ||||
| 		.bio = NULL, | ||||
| 		.last_block_in_bio = 0, | ||||
| 		.get_block = mpd->get_block, | ||||
| 		.use_writepage = 1, | ||||
| 	}; | ||||
| 	int ret = 0, err, nr_pages, i; | ||||
| 	unsigned long index, end; | ||||
| 	struct pagevec pvec; | ||||
| 
 | ||||
| 	BUG_ON(mpd->next_page <= mpd->first_page); | ||||
| 
 | ||||
| 	pagevec_init(&pvec, 0); | ||||
| 	index = mpd->first_page; | ||||
| 	end = mpd->next_page - 1; | ||||
| 
 | ||||
| 	while (index <= end) { | ||||
| 		/* XXX: optimize tail */ | ||||
| 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||||
| 		if (nr_pages == 0) | ||||
| 			break; | ||||
| 		for (i = 0; i < nr_pages; i++) { | ||||
| 			struct page *page = pvec.pages[i]; | ||||
| 
 | ||||
| 			index = page->index; | ||||
| 			if (index > end) | ||||
| 				break; | ||||
| 			index++; | ||||
| 
 | ||||
| 			err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * In error case, we have to continue because | ||||
| 			 * remaining pages are still locked | ||||
| 			 * XXX: unlock and re-dirty them? | ||||
| 			 */ | ||||
| 			if (ret == 0) | ||||
| 				ret = err; | ||||
| 		} | ||||
| 		pagevec_release(&pvec); | ||||
| 	} | ||||
| 	if (mpd_pp.bio) | ||||
| 		mpage_bio_submit(WRITE, mpd_pp.bio); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||||
|  * | ||||
|  * @mpd->inode - inode to walk through | ||||
|  * @exbh->b_blocknr - first block on a disk | ||||
|  * @exbh->b_size - amount of space in bytes | ||||
|  * @logical - first logical block to start assignment with | ||||
|  * | ||||
|  * the function goes through all passed space and put actual disk | ||||
|  * block numbers into buffer heads, dropping BH_Delay | ||||
|  */ | ||||
| static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | ||||
| 				 struct buffer_head *exbh) | ||||
| { | ||||
| 	struct inode *inode = mpd->inode; | ||||
| 	struct address_space *mapping = inode->i_mapping; | ||||
| 	int blocks = exbh->b_size >> inode->i_blkbits; | ||||
| 	sector_t pblock = exbh->b_blocknr, cur_logical; | ||||
| 	struct buffer_head *head, *bh; | ||||
| 	unsigned long index, end; | ||||
| 	struct pagevec pvec; | ||||
| 	int nr_pages, i; | ||||
| 
 | ||||
| 	index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||||
| 	end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||||
| 	cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||||
| 
 | ||||
| 	pagevec_init(&pvec, 0); | ||||
| 
 | ||||
| 	while (index <= end) { | ||||
| 		/* XXX: optimize tail */ | ||||
| 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||||
| 		if (nr_pages == 0) | ||||
| 			break; | ||||
| 		for (i = 0; i < nr_pages; i++) { | ||||
| 			struct page *page = pvec.pages[i]; | ||||
| 
 | ||||
| 			index = page->index; | ||||
| 			if (index > end) | ||||
| 				break; | ||||
| 			index++; | ||||
| 
 | ||||
| 			BUG_ON(!PageLocked(page)); | ||||
| 			BUG_ON(PageWriteback(page)); | ||||
| 			BUG_ON(!page_has_buffers(page)); | ||||
| 
 | ||||
| 			bh = page_buffers(page); | ||||
| 			head = bh; | ||||
| 
 | ||||
| 			/* skip blocks out of the range */ | ||||
| 			do { | ||||
| 				if (cur_logical >= logical) | ||||
| 					break; | ||||
| 				cur_logical++; | ||||
| 			} while ((bh = bh->b_this_page) != head); | ||||
| 
 | ||||
| 			do { | ||||
| 				if (cur_logical >= logical + blocks) | ||||
| 					break; | ||||
| 
 | ||||
| 				if (buffer_delay(bh)) { | ||||
| 					bh->b_blocknr = pblock; | ||||
| 					clear_buffer_delay(bh); | ||||
| 				} else if (buffer_mapped(bh)) { | ||||
| 					BUG_ON(bh->b_blocknr != pblock); | ||||
| 				} | ||||
| 
 | ||||
| 				cur_logical++; | ||||
| 				pblock++; | ||||
| 			} while ((bh = bh->b_this_page) != head); | ||||
| 		} | ||||
| 		pagevec_release(&pvec); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * __unmap_underlying_blocks - just a helper function to unmap | ||||
|  * set of blocks described by @bh | ||||
|  */ | ||||
| static inline void __unmap_underlying_blocks(struct inode *inode, | ||||
| 					     struct buffer_head *bh) | ||||
| { | ||||
| 	struct block_device *bdev = inode->i_sb->s_bdev; | ||||
| 	int blocks, i; | ||||
| 
 | ||||
| 	blocks = bh->b_size >> inode->i_blkbits; | ||||
| 	for (i = 0; i < blocks; i++) | ||||
| 		unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * mpage_da_map_blocks - go through given space | ||||
|  * | ||||
|  * @mpd->lbh - bh describing space | ||||
|  * @mpd->get_block - the filesystem's block mapper function | ||||
|  * | ||||
|  * The function skips space we know is already mapped to disk blocks. | ||||
|  * | ||||
|  * The function ignores errors ->get_block() returns, thus real | ||||
|  * error handling is postponed to __mpage_writepage() | ||||
|  */ | ||||
| static void mpage_da_map_blocks(struct mpage_da_data *mpd) | ||||
| { | ||||
| 	struct buffer_head *lbh = &mpd->lbh; | ||||
| 	int err = 0, remain = lbh->b_size; | ||||
| 	sector_t next = lbh->b_blocknr; | ||||
| 	struct buffer_head new; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We consider only non-mapped and non-allocated blocks | ||||
| 	 */ | ||||
| 	if (buffer_mapped(lbh) && !buffer_delay(lbh)) | ||||
| 		return; | ||||
| 
 | ||||
| 	while (remain) { | ||||
| 		new.b_state = lbh->b_state; | ||||
| 		new.b_blocknr = 0; | ||||
| 		new.b_size = remain; | ||||
| 		err = mpd->get_block(mpd->inode, next, &new, 1); | ||||
| 		if (err) { | ||||
| 			/*
 | ||||
| 			 * Rather than implement own error handling | ||||
| 			 * here, we just leave remaining blocks | ||||
| 			 * unallocated and try again with ->writepage() | ||||
| 			 */ | ||||
| 			break; | ||||
| 		} | ||||
| 		BUG_ON(new.b_size == 0); | ||||
| 
 | ||||
| 		if (buffer_new(&new)) | ||||
| 			__unmap_underlying_blocks(mpd->inode, &new); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If blocks are delayed marked, we need to | ||||
| 		 * put actual blocknr and drop delayed bit | ||||
| 		 */ | ||||
| 		if (buffer_delay(lbh)) | ||||
| 			mpage_put_bnr_to_bhs(mpd, next, &new); | ||||
| 
 | ||||
| 			/* go for the remaining blocks */ | ||||
| 			next += new.b_size >> mpd->inode->i_blkbits; | ||||
| 			remain -= new.b_size; | ||||
| 		} | ||||
| } | ||||
| 
 | ||||
| #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | ||||
| 
 | ||||
| /*
 | ||||
|  * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||||
|  * | ||||
|  * @mpd->lbh - extent of blocks | ||||
|  * @logical - logical number of the block in the file | ||||
|  * @bh - bh of the block (used to access block's state) | ||||
|  * | ||||
|  * the function is used to collect contig. blocks in same state | ||||
|  */ | ||||
| static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | ||||
| 				   sector_t logical, struct buffer_head *bh) | ||||
| { | ||||
| 	struct buffer_head *lbh = &mpd->lbh; | ||||
| 	sector_t next; | ||||
| 
 | ||||
| 	next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * First block in the extent | ||||
| 	 */ | ||||
| 	if (lbh->b_size == 0) { | ||||
| 		lbh->b_blocknr = logical; | ||||
| 		lbh->b_size = bh->b_size; | ||||
| 		lbh->b_state = bh->b_state & BH_FLAGS; | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Can we merge the block to our big extent? | ||||
| 	 */ | ||||
| 	if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | ||||
| 		lbh->b_size += bh->b_size; | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We couldn't merge the block to our extent, so we | ||||
| 	 * need to flush current  extent and start new one | ||||
| 	 */ | ||||
| 	mpage_da_map_blocks(mpd); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now start a new extent | ||||
| 	 */ | ||||
| 	lbh->b_size = bh->b_size; | ||||
| 	lbh->b_state = bh->b_state & BH_FLAGS; | ||||
| 	lbh->b_blocknr = logical; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * __mpage_da_writepage - finds extent of pages and blocks | ||||
|  * | ||||
|  * @page: page to consider | ||||
|  * @wbc: not used, we just follow rules | ||||
|  * @data: context | ||||
|  * | ||||
|  * The function finds extents of pages and scan them for all blocks. | ||||
|  */ | ||||
| static int __mpage_da_writepage(struct page *page, | ||||
| 				struct writeback_control *wbc, void *data) | ||||
| { | ||||
| 	struct mpage_da_data *mpd = data; | ||||
| 	struct inode *inode = mpd->inode; | ||||
| 	struct buffer_head *bh, *head, fake; | ||||
| 	sector_t logical; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Can we merge this page to current extent? | ||||
| 	 */ | ||||
| 	if (mpd->next_page != page->index) { | ||||
| 		/*
 | ||||
| 		 * Nope, we can't. So, we map non-allocated blocks | ||||
| 		 * and start IO on them using __mpage_writepage() | ||||
| 		 */ | ||||
| 		if (mpd->next_page != mpd->first_page) { | ||||
| 			mpage_da_map_blocks(mpd); | ||||
| 			mpage_da_submit_io(mpd); | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Start next extent of pages ... | ||||
| 		 */ | ||||
| 		mpd->first_page = page->index; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * ... and blocks | ||||
| 		 */ | ||||
| 		mpd->lbh.b_size = 0; | ||||
| 		mpd->lbh.b_state = 0; | ||||
| 		mpd->lbh.b_blocknr = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	mpd->next_page = page->index + 1; | ||||
| 	logical = (sector_t) page->index << | ||||
| 		  (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||||
| 
 | ||||
| 	if (!page_has_buffers(page)) { | ||||
| 		/*
 | ||||
| 		 * There is no attached buffer heads yet (mmap?) | ||||
| 		 * we treat the page asfull of dirty blocks | ||||
| 		 */ | ||||
| 		bh = &fake; | ||||
| 		bh->b_size = PAGE_CACHE_SIZE; | ||||
| 		bh->b_state = 0; | ||||
| 		set_buffer_dirty(bh); | ||||
| 		set_buffer_uptodate(bh); | ||||
| 		mpage_add_bh_to_extent(mpd, logical, bh); | ||||
| 	} else { | ||||
| 		/*
 | ||||
| 		 * Page with regular buffer heads, just add all dirty ones | ||||
| 		 */ | ||||
| 		head = page_buffers(page); | ||||
| 		bh = head; | ||||
| 		do { | ||||
| 			BUG_ON(buffer_locked(bh)); | ||||
| 			if (buffer_dirty(bh)) | ||||
| 				mpage_add_bh_to_extent(mpd, logical, bh); | ||||
| 			logical++; | ||||
| 		} while ((bh = bh->b_this_page) != head); | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * mpage_da_writepages - walk the list of dirty pages of the given | ||||
|  * address space, allocates non-allocated blocks, maps newly-allocated | ||||
|  * blocks to existing bhs and issue IO them | ||||
|  * | ||||
|  * @mapping: address space structure to write | ||||
|  * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||||
|  * @get_block: the filesystem's block mapper function. | ||||
|  * | ||||
|  * This is a library function, which implements the writepages() | ||||
|  * address_space_operation. | ||||
|  * | ||||
|  * In order to avoid duplication of logic that deals with partial pages, | ||||
|  * multiple bio per page, etc, we find non-allocated blocks, allocate | ||||
|  * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||||
|  * | ||||
|  * It's important that we call __mpage_writepage() only once for each | ||||
|  * involved page, otherwise we'd have to implement more complicated logic | ||||
|  * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||||
|  * | ||||
|  * See comments to mpage_writepages() | ||||
|  */ | ||||
| static int mpage_da_writepages(struct address_space *mapping, | ||||
| 			       struct writeback_control *wbc, | ||||
| 			       get_block_t get_block) | ||||
| { | ||||
| 	struct mpage_da_data mpd; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!get_block) | ||||
| 		return generic_writepages(mapping, wbc); | ||||
| 
 | ||||
| 	mpd.wbc = wbc; | ||||
| 	mpd.inode = mapping->host; | ||||
| 	mpd.lbh.b_size = 0; | ||||
| 	mpd.lbh.b_state = 0; | ||||
| 	mpd.lbh.b_blocknr = 0; | ||||
| 	mpd.first_page = 0; | ||||
| 	mpd.next_page = 0; | ||||
| 	mpd.get_block = get_block; | ||||
| 
 | ||||
| 	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Handle last extent of pages | ||||
| 	 */ | ||||
| 	if (mpd.next_page != mpd.first_page) { | ||||
| 		mpage_da_map_blocks(&mpd); | ||||
| 		mpage_da_submit_io(&mpd); | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * this is a special callback for ->write_begin() only | ||||
|  * it's intention is to return mapped block or reserve space | ||||
|  */ | ||||
| static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | ||||
| 				  struct buffer_head *bh_result, int create) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	BUG_ON(create == 0); | ||||
| 	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * first, we need to know whether the block is allocated already | ||||
| 	 * preallocated blocks are unmapped but should treated | ||||
| 	 * the same as allocated blocks. | ||||
| 	 */ | ||||
| 	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0); | ||||
| 	if (ret == 0) { | ||||
| 		/* the block isn't allocated yet, let's reserve space */ | ||||
| 		/* XXX: call reservation here */ | ||||
| 		/*
 | ||||
| 		 * XXX: __block_prepare_write() unmaps passed block, | ||||
| 		 * is it OK? | ||||
| 		 */ | ||||
| 		map_bh(bh_result, inode->i_sb, 0); | ||||
| 		set_buffer_new(bh_result); | ||||
| 		set_buffer_delay(bh_result); | ||||
| 	} else if (ret > 0) { | ||||
| 		bh_result->b_size = (ret << inode->i_blkbits); | ||||
| 		ret = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||||
| 				   struct buffer_head *bh_result, int create) | ||||
| { | ||||
| 	int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | ||||
| 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||||
| 	loff_t disksize = EXT4_I(inode)->i_disksize; | ||||
| 	handle_t *handle = NULL; | ||||
| 
 | ||||
| 	if (create) { | ||||
| 		handle = ext4_journal_start(inode, needed_blocks); | ||||
| 		if (IS_ERR(handle)) { | ||||
| 			ret = PTR_ERR(handle); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||||
| 				   bh_result, create, 0); | ||||
| 	if (ret > 0) { | ||||
| 		bh_result->b_size = (ret << inode->i_blkbits); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Update on-disk size along with block allocation | ||||
| 		 * we don't use 'extend_disksize' as size may change | ||||
| 		 * within already allocated block -bzzz | ||||
| 		 */ | ||||
| 		disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||||
| 		if (disksize > i_size_read(inode)) | ||||
| 			disksize = i_size_read(inode); | ||||
| 		if (disksize > EXT4_I(inode)->i_disksize) { | ||||
| 			/*
 | ||||
| 			 * XXX: replace with spinlock if seen contended -bzzz | ||||
| 			 */ | ||||
| 			down_write(&EXT4_I(inode)->i_data_sem); | ||||
| 			if (disksize > EXT4_I(inode)->i_disksize) | ||||
| 				EXT4_I(inode)->i_disksize = disksize; | ||||
| 			up_write(&EXT4_I(inode)->i_data_sem); | ||||
| 
 | ||||
| 			if (EXT4_I(inode)->i_disksize == disksize) { | ||||
| 				if (handle == NULL) | ||||
| 					handle = ext4_journal_start(inode, 1); | ||||
| 				if (!IS_ERR(handle)) | ||||
| 					ext4_mark_inode_dirty(handle, inode); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		ret = 0; | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	if (handle && !IS_ERR(handle)) | ||||
| 		ext4_journal_stop(handle); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| /* FIXME!! only support data=writeback mode */ | ||||
| static int ext4_da_writepage(struct page *page, | ||||
| 				struct writeback_control *wbc) | ||||
| { | ||||
| 	struct inode *inode = page->mapping->host; | ||||
| 	handle_t *handle = NULL; | ||||
| 	int ret = 0; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (ext4_journal_current_handle()) | ||||
| 		goto out_fail; | ||||
| 
 | ||||
| 	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | ||||
| 	if (IS_ERR(handle)) { | ||||
| 		ret = PTR_ERR(handle); | ||||
| 		goto out_fail; | ||||
| 	} | ||||
| 
 | ||||
| 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | ||||
| 		ret = nobh_writepage(page, ext4_get_block, wbc); | ||||
| 	else | ||||
| 		ret = block_write_full_page(page, ext4_get_block, wbc); | ||||
| 
 | ||||
| 	if (!ret && inode->i_size > EXT4_I(inode)->i_disksize) { | ||||
| 		EXT4_I(inode)->i_disksize = inode->i_size; | ||||
| 		ext4_mark_inode_dirty(handle, inode); | ||||
| 	} | ||||
| 
 | ||||
| 	err = ext4_journal_stop(handle); | ||||
| 	if (!ret) | ||||
| 		ret = err; | ||||
| 	return ret; | ||||
| 
 | ||||
| out_fail: | ||||
| 	redirty_page_for_writepage(wbc, page); | ||||
| 	unlock_page(page); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ext4_da_writepages(struct address_space *mapping, | ||||
| 				struct writeback_control *wbc) | ||||
| { | ||||
| 	return mpage_da_writepages(mapping, wbc, ext4_da_get_block_write); | ||||
| } | ||||
| 
 | ||||
| static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | ||||
| 				loff_t pos, unsigned len, unsigned flags, | ||||
| 				struct page **pagep, void **fsdata) | ||||
| { | ||||
| 	int ret; | ||||
| 	struct page *page; | ||||
| 	pgoff_t index; | ||||
| 	unsigned from, to; | ||||
| 	struct inode *inode = mapping->host; | ||||
| 	handle_t *handle; | ||||
| 
 | ||||
| 	index = pos >> PAGE_CACHE_SHIFT; | ||||
| 	from = pos & (PAGE_CACHE_SIZE - 1); | ||||
| 	to = from + len; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * With delayed allocation, we don't log the i_disksize update | ||||
| 	 * if there is delayed block allocation. But we still need | ||||
| 	 * to journalling the i_disksize update if writes to the end | ||||
| 	 * of file which has an already mapped buffer. | ||||
| 	 */ | ||||
| 	handle = ext4_journal_start(inode, 1); | ||||
| 	if (IS_ERR(handle)) { | ||||
| 		ret = PTR_ERR(handle); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	page = __grab_cache_page(mapping, index); | ||||
| 	if (!page) | ||||
| 		return -ENOMEM; | ||||
| 	*pagep = page; | ||||
| 
 | ||||
| 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||||
| 							ext4_da_get_block_prep); | ||||
| 	if (ret < 0) { | ||||
| 		unlock_page(page); | ||||
| 		ext4_journal_stop(handle); | ||||
| 		page_cache_release(page); | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||||
| { | ||||
| 	return !buffer_mapped(bh) || buffer_delay(bh); | ||||
| } | ||||
| 
 | ||||
| static int ext4_da_write_end(struct file *file, | ||||
| 				struct address_space *mapping, | ||||
| 				loff_t pos, unsigned len, unsigned copied, | ||||
| 				struct page *page, void *fsdata) | ||||
| { | ||||
| 	struct inode *inode = mapping->host; | ||||
| 	int ret = 0, ret2; | ||||
| 	handle_t *handle = ext4_journal_current_handle(); | ||||
| 	loff_t new_i_size; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * generic_write_end() will run mark_inode_dirty() if i_size | ||||
| 	 * changes.  So let's piggyback the i_disksize mark_inode_dirty | ||||
| 	 * into that. | ||||
| 	 */ | ||||
| 
 | ||||
| 	new_i_size = pos + copied; | ||||
| 	if (new_i_size > EXT4_I(inode)->i_disksize) | ||||
| 		if (!walk_page_buffers(NULL, page_buffers(page), | ||||
| 				       0, len, NULL, ext4_bh_unmapped_or_delay)){ | ||||
| 			/*
 | ||||
| 			 * Updating i_disksize when extending file without | ||||
| 			 * needing block allocation | ||||
| 			 */ | ||||
| 			if (ext4_should_order_data(inode)) | ||||
| 				ret = ext4_jbd2_file_inode(handle, inode); | ||||
| 
 | ||||
| 			EXT4_I(inode)->i_disksize = new_i_size; | ||||
| 		} | ||||
| 	ret2 = generic_write_end(file, mapping, pos, len, copied, | ||||
| 							page, fsdata); | ||||
| 	copied = ret2; | ||||
| 	if (ret2 < 0) | ||||
| 		ret = ret2; | ||||
| 	ret2 = ext4_journal_stop(handle); | ||||
| 	if (!ret) | ||||
| 		ret = ret2; | ||||
| 
 | ||||
| 	return ret ? ret : copied; | ||||
| } | ||||
| 
 | ||||
| static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | ||||
| { | ||||
| 	struct buffer_head *head, *bh; | ||||
| 	unsigned int curr_off = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Drop reserved blocks | ||||
| 	 */ | ||||
| 	BUG_ON(!PageLocked(page)); | ||||
| 	if (!page_has_buffers(page)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	head = page_buffers(page); | ||||
| 	bh = head; | ||||
| 	do { | ||||
| 		unsigned int next_off = curr_off + bh->b_size; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * is this block fully invalidated? | ||||
| 		 */ | ||||
| 		if (offset <= curr_off && buffer_delay(bh)) { | ||||
| 			clear_buffer_delay(bh); | ||||
| 			/* XXX: add real stuff here */ | ||||
| 		} | ||||
| 		curr_off = next_off; | ||||
| 		bh = bh->b_this_page; | ||||
| 	} while (bh != head); | ||||
| 
 | ||||
| out: | ||||
| 	ext4_invalidatepage(page, offset); | ||||
| 
 | ||||
| 	return; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * bmap() is special.  It gets used by applications such as lilo and by | ||||
|  * the swapper to find the on-disk block of a specific piece of data. | ||||
|  | @ -1427,6 +2093,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 	journal_t *journal; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | ||||
| 			test_opt(inode->i_sb, DELALLOC)) { | ||||
| 		/*
 | ||||
| 		 * With delalloc we want to sync the file | ||||
| 		 * so that we can make sure we allocate | ||||
| 		 * blocks for file | ||||
| 		 */ | ||||
| 		filemap_write_and_wait(mapping); | ||||
| 	} | ||||
| 
 | ||||
| 	if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | ||||
| 		/*
 | ||||
| 		 * This is a REALLY heavyweight approach, but the use of | ||||
|  | @ -1471,11 +2147,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||||
| { | ||||
| 	return !buffer_mapped(bh) || buffer_delay(bh); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Note that we don't need to start a transaction unless we're journaling data | ||||
|  * because we should have holes filled from ext4_page_mkwrite(). We even don't | ||||
|  | @ -1832,10 +2503,28 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 	.releasepage	= ext4_releasepage, | ||||
| }; | ||||
| 
 | ||||
| static const struct address_space_operations ext4_da_aops = { | ||||
| 	.readpage	= ext4_readpage, | ||||
| 	.readpages	= ext4_readpages, | ||||
| 	.writepage	= ext4_da_writepage, | ||||
| 	.writepages	= ext4_da_writepages, | ||||
| 	.sync_page	= block_sync_page, | ||||
| 	.write_begin	= ext4_da_write_begin, | ||||
| 	.write_end	= ext4_da_write_end, | ||||
| 	.bmap		= ext4_bmap, | ||||
| 	.invalidatepage	= ext4_da_invalidatepage, | ||||
| 	.releasepage	= ext4_releasepage, | ||||
| 	.direct_IO	= ext4_direct_IO, | ||||
| 	.migratepage	= buffer_migrate_page, | ||||
| }; | ||||
| 
 | ||||
| void ext4_set_aops(struct inode *inode) | ||||
| { | ||||
| 	if (ext4_should_order_data(inode)) | ||||
| 		inode->i_mapping->a_ops = &ext4_ordered_aops; | ||||
| 	else if (ext4_should_writeback_data(inode) && | ||||
| 		 test_opt(inode->i_sb, DELALLOC)) | ||||
| 		inode->i_mapping->a_ops = &ext4_da_aops; | ||||
| 	else if (ext4_should_writeback_data(inode)) | ||||
| 		inode->i_mapping->a_ops = &ext4_writeback_aops; | ||||
| 	else | ||||
|  |  | |||
|  | @ -898,7 +898,7 @@ enum { | |||
| 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | ||||
| 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | ||||
| 	Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | ||||
| 	Opt_mballoc, Opt_nomballoc, Opt_stripe, | ||||
| 	Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, | ||||
| }; | ||||
| 
 | ||||
| static match_table_t tokens = { | ||||
|  | @ -957,6 +957,7 @@ static match_table_t tokens = { | |||
| 	{Opt_nomballoc, "nomballoc"}, | ||||
| 	{Opt_stripe, "stripe=%u"}, | ||||
| 	{Opt_resize, "resize"}, | ||||
| 	{Opt_delalloc, "delalloc"}, | ||||
| 	{Opt_err, NULL}, | ||||
| }; | ||||
| 
 | ||||
|  | @ -1335,6 +1336,9 @@ static int parse_options (char *options, struct super_block *sb, | |||
| 				return 0; | ||||
| 			sbi->s_stripe = option; | ||||
| 			break; | ||||
| 		case Opt_delalloc: | ||||
| 			set_opt(sbi->s_mount_opt, DELALLOC); | ||||
| 			break; | ||||
| 		default: | ||||
| 			printk (KERN_ERR | ||||
| 				"EXT4-fs: Unrecognized mount option \"%s\" " | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Alex Tomas
						Alex Tomas