mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	filesystem-dax: Introduce dax_lock_mapping_entry()
In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
This commit is contained in:
		
							parent
							
								
									ae1139ece1
								
							
						
					
					
						commit
						c2a7d2a115
					
				
					 2 changed files with 116 additions and 6 deletions
				
			
		
							
								
								
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							|  | @ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) | |||
|  * | ||||
|  * Must be called with the i_pages lock held. | ||||
|  */ | ||||
| static void *get_unlocked_mapping_entry(struct address_space *mapping, | ||||
| 					pgoff_t index, void ***slotp) | ||||
| static void *__get_unlocked_mapping_entry(struct address_space *mapping, | ||||
| 		pgoff_t index, void ***slotp, bool (*wait_fn)(void)) | ||||
| { | ||||
| 	void *entry, **slot; | ||||
| 	struct wait_exceptional_entry_queue ewait; | ||||
|  | @ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | |||
| 	ewait.wait.func = wake_exceptional_entry_func; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		bool revalidate; | ||||
| 
 | ||||
| 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, | ||||
| 					  &slot); | ||||
| 		if (!entry || | ||||
|  | @ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | |||
| 		prepare_to_wait_exclusive(wq, &ewait.wait, | ||||
| 					  TASK_UNINTERRUPTIBLE); | ||||
| 		xa_unlock_irq(&mapping->i_pages); | ||||
| 		schedule(); | ||||
| 		revalidate = wait_fn(); | ||||
| 		finish_wait(wq, &ewait.wait); | ||||
| 		xa_lock_irq(&mapping->i_pages); | ||||
| 		if (revalidate) | ||||
| 			return ERR_PTR(-EAGAIN); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void dax_unlock_mapping_entry(struct address_space *mapping, | ||||
| 				     pgoff_t index) | ||||
| static bool entry_wait(void) | ||||
| { | ||||
| 	schedule(); | ||||
| 	/*
 | ||||
| 	 * Never return an ERR_PTR() from | ||||
| 	 * __get_unlocked_mapping_entry(), just keep looping. | ||||
| 	 */ | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static void *get_unlocked_mapping_entry(struct address_space *mapping, | ||||
| 		pgoff_t index, void ***slotp) | ||||
| { | ||||
| 	return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait); | ||||
| } | ||||
| 
 | ||||
| static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) | ||||
| { | ||||
| 	void *entry, **slot; | ||||
| 
 | ||||
|  | @ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, | |||
| static void put_locked_mapping_entry(struct address_space *mapping, | ||||
| 		pgoff_t index) | ||||
| { | ||||
| 	dax_unlock_mapping_entry(mapping, index); | ||||
| 	unlock_mapping_entry(mapping, index); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry) | |||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static bool entry_wait_revalidate(void) | ||||
| { | ||||
| 	rcu_read_unlock(); | ||||
| 	schedule(); | ||||
| 	rcu_read_lock(); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Tell __get_unlocked_mapping_entry() to take a break, we need | ||||
| 	 * to revalidate page->mapping after dropping locks | ||||
| 	 */ | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| bool dax_lock_mapping_entry(struct page *page) | ||||
| { | ||||
| 	pgoff_t index; | ||||
| 	struct inode *inode; | ||||
| 	bool did_lock = false; | ||||
| 	void *entry = NULL, **slot; | ||||
| 	struct address_space *mapping; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	for (;;) { | ||||
| 		mapping = READ_ONCE(page->mapping); | ||||
| 
 | ||||
| 		if (!dax_mapping(mapping)) | ||||
| 			break; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * In the device-dax case there's no need to lock, a | ||||
| 		 * struct dev_pagemap pin is sufficient to keep the | ||||
| 		 * inode alive, and we assume we have dev_pagemap pin | ||||
| 		 * otherwise we would not have a valid pfn_to_page() | ||||
| 		 * translation. | ||||
| 		 */ | ||||
| 		inode = mapping->host; | ||||
| 		if (S_ISCHR(inode->i_mode)) { | ||||
| 			did_lock = true; | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		xa_lock_irq(&mapping->i_pages); | ||||
| 		if (mapping != page->mapping) { | ||||
| 			xa_unlock_irq(&mapping->i_pages); | ||||
| 			continue; | ||||
| 		} | ||||
| 		index = page->index; | ||||
| 
 | ||||
| 		entry = __get_unlocked_mapping_entry(mapping, index, &slot, | ||||
| 				entry_wait_revalidate); | ||||
| 		if (!entry) { | ||||
| 			xa_unlock_irq(&mapping->i_pages); | ||||
| 			break; | ||||
| 		} else if (IS_ERR(entry)) { | ||||
| 			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); | ||||
| 			continue; | ||||
| 		} | ||||
| 		lock_slot(mapping, slot); | ||||
| 		did_lock = true; | ||||
| 		xa_unlock_irq(&mapping->i_pages); | ||||
| 		break; | ||||
| 	} | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return did_lock; | ||||
| } | ||||
| 
 | ||||
| void dax_unlock_mapping_entry(struct page *page) | ||||
| { | ||||
| 	struct address_space *mapping = page->mapping; | ||||
| 	struct inode *inode = mapping->host; | ||||
| 
 | ||||
| 	if (S_ISCHR(inode->i_mode)) | ||||
| 		return; | ||||
| 
 | ||||
| 	unlock_mapping_entry(mapping, page->index); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Find radix tree entry at given index. If it points to an exceptional entry, | ||||
|  * return it with the radix tree entry locked. If the radix tree doesn't | ||||
|  |  | |||
|  | @ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
| 		struct block_device *bdev, struct writeback_control *wbc); | ||||
| 
 | ||||
| struct page *dax_layout_busy_page(struct address_space *mapping); | ||||
| bool dax_lock_mapping_entry(struct page *page); | ||||
| void dax_unlock_mapping_entry(struct page *page); | ||||
| #else | ||||
| static inline bool bdev_dax_supported(struct block_device *bdev, | ||||
| 		int blocksize) | ||||
|  | @ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, | |||
| { | ||||
| 	return -EOPNOTSUPP; | ||||
| } | ||||
| 
 | ||||
| static inline bool dax_lock_mapping_entry(struct page *page) | ||||
| { | ||||
| 	if (IS_DAX(page->mapping->host)) | ||||
| 		return true; | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static inline void dax_unlock_mapping_entry(struct page *page) | ||||
| { | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| int dax_read_lock(void); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Dan Williams
						Dan Williams