mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	filesystem-dax: Introduce dax_lock_mapping_entry()
In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
This commit is contained in:
		
							parent
							
								
									ae1139ece1
								
							
						
					
					
						commit
						c2a7d2a115
					
				
					 2 changed files with 116 additions and 6 deletions
				
			
		
							
								
								
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							|  | @ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) | ||||||
|  * |  * | ||||||
|  * Must be called with the i_pages lock held. |  * Must be called with the i_pages lock held. | ||||||
|  */ |  */ | ||||||
| static void *get_unlocked_mapping_entry(struct address_space *mapping, | static void *__get_unlocked_mapping_entry(struct address_space *mapping, | ||||||
| 					pgoff_t index, void ***slotp) | 		pgoff_t index, void ***slotp, bool (*wait_fn)(void)) | ||||||
| { | { | ||||||
| 	void *entry, **slot; | 	void *entry, **slot; | ||||||
| 	struct wait_exceptional_entry_queue ewait; | 	struct wait_exceptional_entry_queue ewait; | ||||||
|  | @ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | ||||||
| 	ewait.wait.func = wake_exceptional_entry_func; | 	ewait.wait.func = wake_exceptional_entry_func; | ||||||
| 
 | 
 | ||||||
| 	for (;;) { | 	for (;;) { | ||||||
|  | 		bool revalidate; | ||||||
|  | 
 | ||||||
| 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, | 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, | ||||||
| 					  &slot); | 					  &slot); | ||||||
| 		if (!entry || | 		if (!entry || | ||||||
|  | @ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | ||||||
| 		prepare_to_wait_exclusive(wq, &ewait.wait, | 		prepare_to_wait_exclusive(wq, &ewait.wait, | ||||||
| 					  TASK_UNINTERRUPTIBLE); | 					  TASK_UNINTERRUPTIBLE); | ||||||
| 		xa_unlock_irq(&mapping->i_pages); | 		xa_unlock_irq(&mapping->i_pages); | ||||||
| 		schedule(); | 		revalidate = wait_fn(); | ||||||
| 		finish_wait(wq, &ewait.wait); | 		finish_wait(wq, &ewait.wait); | ||||||
| 		xa_lock_irq(&mapping->i_pages); | 		xa_lock_irq(&mapping->i_pages); | ||||||
|  | 		if (revalidate) | ||||||
|  | 			return ERR_PTR(-EAGAIN); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void dax_unlock_mapping_entry(struct address_space *mapping, | static bool entry_wait(void) | ||||||
| 				     pgoff_t index) | { | ||||||
|  | 	schedule(); | ||||||
|  | 	/*
 | ||||||
|  | 	 * Never return an ERR_PTR() from | ||||||
|  | 	 * __get_unlocked_mapping_entry(), just keep looping. | ||||||
|  | 	 */ | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *get_unlocked_mapping_entry(struct address_space *mapping, | ||||||
|  | 		pgoff_t index, void ***slotp) | ||||||
|  | { | ||||||
|  | 	return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) | ||||||
| { | { | ||||||
| 	void *entry, **slot; | 	void *entry, **slot; | ||||||
| 
 | 
 | ||||||
|  | @ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, | ||||||
| static void put_locked_mapping_entry(struct address_space *mapping, | static void put_locked_mapping_entry(struct address_space *mapping, | ||||||
| 		pgoff_t index) | 		pgoff_t index) | ||||||
| { | { | ||||||
| 	dax_unlock_mapping_entry(mapping, index); | 	unlock_mapping_entry(mapping, index); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry) | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static bool entry_wait_revalidate(void) | ||||||
|  | { | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 	schedule(); | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Tell __get_unlocked_mapping_entry() to take a break, we need | ||||||
|  | 	 * to revalidate page->mapping after dropping locks | ||||||
|  | 	 */ | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool dax_lock_mapping_entry(struct page *page) | ||||||
|  | { | ||||||
|  | 	pgoff_t index; | ||||||
|  | 	struct inode *inode; | ||||||
|  | 	bool did_lock = false; | ||||||
|  | 	void *entry = NULL, **slot; | ||||||
|  | 	struct address_space *mapping; | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	for (;;) { | ||||||
|  | 		mapping = READ_ONCE(page->mapping); | ||||||
|  | 
 | ||||||
|  | 		if (!dax_mapping(mapping)) | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * In the device-dax case there's no need to lock, a | ||||||
|  | 		 * struct dev_pagemap pin is sufficient to keep the | ||||||
|  | 		 * inode alive, and we assume we have dev_pagemap pin | ||||||
|  | 		 * otherwise we would not have a valid pfn_to_page() | ||||||
|  | 		 * translation. | ||||||
|  | 		 */ | ||||||
|  | 		inode = mapping->host; | ||||||
|  | 		if (S_ISCHR(inode->i_mode)) { | ||||||
|  | 			did_lock = true; | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		xa_lock_irq(&mapping->i_pages); | ||||||
|  | 		if (mapping != page->mapping) { | ||||||
|  | 			xa_unlock_irq(&mapping->i_pages); | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		index = page->index; | ||||||
|  | 
 | ||||||
|  | 		entry = __get_unlocked_mapping_entry(mapping, index, &slot, | ||||||
|  | 				entry_wait_revalidate); | ||||||
|  | 		if (!entry) { | ||||||
|  | 			xa_unlock_irq(&mapping->i_pages); | ||||||
|  | 			break; | ||||||
|  | 		} else if (IS_ERR(entry)) { | ||||||
|  | 			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		lock_slot(mapping, slot); | ||||||
|  | 		did_lock = true; | ||||||
|  | 		xa_unlock_irq(&mapping->i_pages); | ||||||
|  | 		break; | ||||||
|  | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 
 | ||||||
|  | 	return did_lock; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void dax_unlock_mapping_entry(struct page *page) | ||||||
|  | { | ||||||
|  | 	struct address_space *mapping = page->mapping; | ||||||
|  | 	struct inode *inode = mapping->host; | ||||||
|  | 
 | ||||||
|  | 	if (S_ISCHR(inode->i_mode)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	unlock_mapping_entry(mapping, page->index); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Find radix tree entry at given index. If it points to an exceptional entry, |  * Find radix tree entry at given index. If it points to an exceptional entry, | ||||||
|  * return it with the radix tree entry locked. If the radix tree doesn't |  * return it with the radix tree entry locked. If the radix tree doesn't | ||||||
|  |  | ||||||
|  | @ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, | ||||||
| 		struct block_device *bdev, struct writeback_control *wbc); | 		struct block_device *bdev, struct writeback_control *wbc); | ||||||
| 
 | 
 | ||||||
| struct page *dax_layout_busy_page(struct address_space *mapping); | struct page *dax_layout_busy_page(struct address_space *mapping); | ||||||
|  | bool dax_lock_mapping_entry(struct page *page); | ||||||
|  | void dax_unlock_mapping_entry(struct page *page); | ||||||
| #else | #else | ||||||
| static inline bool bdev_dax_supported(struct block_device *bdev, | static inline bool bdev_dax_supported(struct block_device *bdev, | ||||||
| 		int blocksize) | 		int blocksize) | ||||||
|  | @ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, | ||||||
| { | { | ||||||
| 	return -EOPNOTSUPP; | 	return -EOPNOTSUPP; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | static inline bool dax_lock_mapping_entry(struct page *page) | ||||||
|  | { | ||||||
|  | 	if (IS_DAX(page->mapping->host)) | ||||||
|  | 		return true; | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void dax_unlock_mapping_entry(struct page *page) | ||||||
|  | { | ||||||
|  | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| int dax_read_lock(void); | int dax_read_lock(void); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Dan Williams
						Dan Williams