forked from mirrors/linux
		
	filesystem-dax: Introduce dax_lock_mapping_entry()
In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
This commit is contained in:
		
							parent
							
								
									ae1139ece1
								
							
						
					
					
						commit
						c2a7d2a115
					
				
					 2 changed files with 116 additions and 6 deletions
				
			
		
							
								
								
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								fs/dax.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
 | 
			
		|||
 *
 | 
			
		||||
 * Must be called with the i_pages lock held.
 | 
			
		||||
 */
 | 
			
		||||
static void *get_unlocked_mapping_entry(struct address_space *mapping,
 | 
			
		||||
					pgoff_t index, void ***slotp)
 | 
			
		||||
static void *__get_unlocked_mapping_entry(struct address_space *mapping,
 | 
			
		||||
		pgoff_t index, void ***slotp, bool (*wait_fn)(void))
 | 
			
		||||
{
 | 
			
		||||
	void *entry, **slot;
 | 
			
		||||
	struct wait_exceptional_entry_queue ewait;
 | 
			
		||||
| 
						 | 
				
			
			@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 | 
			
		|||
	ewait.wait.func = wake_exceptional_entry_func;
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		bool revalidate;
 | 
			
		||||
 | 
			
		||||
		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 | 
			
		||||
					  &slot);
 | 
			
		||||
		if (!entry ||
 | 
			
		||||
| 
						 | 
				
			
			@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 | 
			
		|||
		prepare_to_wait_exclusive(wq, &ewait.wait,
 | 
			
		||||
					  TASK_UNINTERRUPTIBLE);
 | 
			
		||||
		xa_unlock_irq(&mapping->i_pages);
 | 
			
		||||
		schedule();
 | 
			
		||||
		revalidate = wait_fn();
 | 
			
		||||
		finish_wait(wq, &ewait.wait);
 | 
			
		||||
		xa_lock_irq(&mapping->i_pages);
 | 
			
		||||
		if (revalidate)
 | 
			
		||||
			return ERR_PTR(-EAGAIN);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dax_unlock_mapping_entry(struct address_space *mapping,
 | 
			
		||||
				     pgoff_t index)
 | 
			
		||||
static bool entry_wait(void)
 | 
			
		||||
{
 | 
			
		||||
	schedule();
 | 
			
		||||
	/*
 | 
			
		||||
	 * Never return an ERR_PTR() from
 | 
			
		||||
	 * __get_unlocked_mapping_entry(), just keep looping.
 | 
			
		||||
	 */
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void *get_unlocked_mapping_entry(struct address_space *mapping,
 | 
			
		||||
		pgoff_t index, void ***slotp)
 | 
			
		||||
{
 | 
			
		||||
	return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 | 
			
		||||
{
 | 
			
		||||
	void *entry, **slot;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 | 
			
		|||
static void put_locked_mapping_entry(struct address_space *mapping,
 | 
			
		||||
		pgoff_t index)
 | 
			
		||||
{
 | 
			
		||||
	dax_unlock_mapping_entry(mapping, index);
 | 
			
		||||
	unlock_mapping_entry(mapping, index);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
 | 
			
		|||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool entry_wait_revalidate(void)
 | 
			
		||||
{
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	schedule();
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Tell __get_unlocked_mapping_entry() to take a break, we need
 | 
			
		||||
	 * to revalidate page->mapping after dropping locks
 | 
			
		||||
	 */
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool dax_lock_mapping_entry(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	pgoff_t index;
 | 
			
		||||
	struct inode *inode;
 | 
			
		||||
	bool did_lock = false;
 | 
			
		||||
	void *entry = NULL, **slot;
 | 
			
		||||
	struct address_space *mapping;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		mapping = READ_ONCE(page->mapping);
 | 
			
		||||
 | 
			
		||||
		if (!dax_mapping(mapping))
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * In the device-dax case there's no need to lock, a
 | 
			
		||||
		 * struct dev_pagemap pin is sufficient to keep the
 | 
			
		||||
		 * inode alive, and we assume we have dev_pagemap pin
 | 
			
		||||
		 * otherwise we would not have a valid pfn_to_page()
 | 
			
		||||
		 * translation.
 | 
			
		||||
		 */
 | 
			
		||||
		inode = mapping->host;
 | 
			
		||||
		if (S_ISCHR(inode->i_mode)) {
 | 
			
		||||
			did_lock = true;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		xa_lock_irq(&mapping->i_pages);
 | 
			
		||||
		if (mapping != page->mapping) {
 | 
			
		||||
			xa_unlock_irq(&mapping->i_pages);
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		index = page->index;
 | 
			
		||||
 | 
			
		||||
		entry = __get_unlocked_mapping_entry(mapping, index, &slot,
 | 
			
		||||
				entry_wait_revalidate);
 | 
			
		||||
		if (!entry) {
 | 
			
		||||
			xa_unlock_irq(&mapping->i_pages);
 | 
			
		||||
			break;
 | 
			
		||||
		} else if (IS_ERR(entry)) {
 | 
			
		||||
			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		lock_slot(mapping, slot);
 | 
			
		||||
		did_lock = true;
 | 
			
		||||
		xa_unlock_irq(&mapping->i_pages);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	return did_lock;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dax_unlock_mapping_entry(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	struct address_space *mapping = page->mapping;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
 | 
			
		||||
	if (S_ISCHR(inode->i_mode))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	unlock_mapping_entry(mapping, page->index);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Find radix tree entry at given index. If it points to an exceptional entry,
 | 
			
		||||
 * return it with the radix tree entry locked. If the radix tree doesn't
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 | 
			
		|||
		struct block_device *bdev, struct writeback_control *wbc);
 | 
			
		||||
 | 
			
		||||
struct page *dax_layout_busy_page(struct address_space *mapping);
 | 
			
		||||
bool dax_lock_mapping_entry(struct page *page);
 | 
			
		||||
void dax_unlock_mapping_entry(struct page *page);
 | 
			
		||||
#else
 | 
			
		||||
static inline bool bdev_dax_supported(struct block_device *bdev,
 | 
			
		||||
		int blocksize)
 | 
			
		||||
| 
						 | 
				
			
			@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 | 
			
		|||
{
 | 
			
		||||
	return -EOPNOTSUPP;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool dax_lock_mapping_entry(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	if (IS_DAX(page->mapping->host))
 | 
			
		||||
		return true;
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void dax_unlock_mapping_entry(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int dax_read_lock(void);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue