mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	block: use DAX for partition table reads
Avoid populating pagecache when the block device is in DAX mode. Otherwise these page cache entries collide with the fsync/msync implementation and break data durability guarantees. Cc: Jan Kara <jack@suse.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Chinner <david@fromorbit.com> Cc: Andrew Morton <akpm@linux-foundation.org> Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com> Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Matthew Wilcox <willy@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
		
							parent
							
								
									9f4736fe7c
								
							
						
					
					
						commit
						d1a5f2b4d8
					
				
					 3 changed files with 46 additions and 3 deletions
				
			
		|  | @ -16,6 +16,7 @@ | ||||||
| #include <linux/kmod.h> | #include <linux/kmod.h> | ||||||
| #include <linux/ctype.h> | #include <linux/ctype.h> | ||||||
| #include <linux/genhd.h> | #include <linux/genhd.h> | ||||||
|  | #include <linux/dax.h> | ||||||
| #include <linux/blktrace_api.h> | #include <linux/blktrace_api.h> | ||||||
| 
 | 
 | ||||||
| #include "partitions/check.h" | #include "partitions/check.h" | ||||||
|  | @ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) | static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n) | ||||||
| { | { | ||||||
| 	struct address_space *mapping = bdev->bd_inode->i_mapping; | 	struct address_space *mapping = bdev->bd_inode->i_mapping; | ||||||
|  | 
 | ||||||
|  | 	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), | ||||||
|  | 			NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) | ||||||
|  | { | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 
 | 
 | ||||||
| 	page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), | 	/* don't populate page cache for dax capable devices */ | ||||||
| 				 NULL); | 	if (IS_DAX(bdev->bd_inode)) | ||||||
|  | 		page = read_dax_sector(bdev, n); | ||||||
|  | 	else | ||||||
|  | 		page = read_pagecache_sector(bdev, n); | ||||||
|  | 
 | ||||||
| 	if (!IS_ERR(page)) { | 	if (!IS_ERR(page)) { | ||||||
| 		if (PageError(page)) | 		if (PageError(page)) | ||||||
| 			goto fail; | 			goto fail; | ||||||
|  |  | ||||||
							
								
								
									
										20
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								fs/dax.c
									
									
									
									
									
								
							|  | @ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev, | ||||||
| 	blk_queue_exit(bdev->bd_queue); | 	blk_queue_exit(bdev->bd_queue); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct page *read_dax_sector(struct block_device *bdev, sector_t n) | ||||||
|  | { | ||||||
|  | 	struct page *page = alloc_pages(GFP_KERNEL, 0); | ||||||
|  | 	struct blk_dax_ctl dax = { | ||||||
|  | 		.size = PAGE_SIZE, | ||||||
|  | 		.sector = n & ~((((int) PAGE_SIZE) / 512) - 1), | ||||||
|  | 	}; | ||||||
|  | 	long rc; | ||||||
|  | 
 | ||||||
|  | 	if (!page) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 
 | ||||||
|  | 	rc = dax_map_atomic(bdev, &dax); | ||||||
|  | 	if (rc < 0) | ||||||
|  | 		return ERR_PTR(rc); | ||||||
|  | 	memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); | ||||||
|  | 	dax_unmap_atomic(bdev, &dax); | ||||||
|  | 	return page; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * dax_clear_blocks() is called from within transaction context from XFS, |  * dax_clear_blocks() is called from within transaction context from XFS, | ||||||
|  * and hence this means the stack from this point must follow GFP_NOFS |  * and hence this means the stack from this point must follow GFP_NOFS | ||||||
|  |  | ||||||
|  | @ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, | ||||||
| 		dax_iodone_t); | 		dax_iodone_t); | ||||||
| int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, | int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, | ||||||
| 		dax_iodone_t); | 		dax_iodone_t); | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_FS_DAX | ||||||
|  | struct page *read_dax_sector(struct block_device *bdev, sector_t n); | ||||||
|  | #else | ||||||
|  | static inline struct page *read_dax_sector(struct block_device *bdev, | ||||||
|  | 		sector_t n) | ||||||
|  | { | ||||||
|  | 	return ERR_PTR(-ENXIO); | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_TRANSPARENT_HUGEPAGE | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||||||
| int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, | int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, | ||||||
| 				unsigned int flags, get_block_t, dax_iodone_t); | 				unsigned int flags, get_block_t, dax_iodone_t); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Dan Williams
						Dan Williams