forked from mirrors/linux
		
	dax: Convert dax_insert_pfn_mkwrite to XArray
Add some XArray-based helper functions to replace the radix tree based metaphors currently in use. The biggest change is that converted code doesn't see its own lock bit; get_unlocked_entry() always returns an entry with the lock bit clear. So we don't have to mess around loading the current entry and clearing the lock bit; we can just store the unlocked entry that we already have. Signed-off-by: Matthew Wilcox <willy@infradead.org>
This commit is contained in:
		
							parent
							
								
									ec4907ff69
								
							
						
					
					
						commit
						cfc93c6c6c
					
				
					 1 changed files with 117 additions and 32 deletions
				
			
		
							
								
								
									
										149
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										149
									
								
								fs/dax.c
									
									
									
									
									
								
							| 
						 | 
					@ -38,6 +38,17 @@
 | 
				
			||||||
#define CREATE_TRACE_POINTS
 | 
					#define CREATE_TRACE_POINTS
 | 
				
			||||||
#include <trace/events/fs_dax.h>
 | 
					#include <trace/events/fs_dax.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline unsigned int pe_order(enum page_entry_size pe_size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (pe_size == PE_SIZE_PTE)
 | 
				
			||||||
 | 
							return PAGE_SHIFT - PAGE_SHIFT;
 | 
				
			||||||
 | 
						if (pe_size == PE_SIZE_PMD)
 | 
				
			||||||
 | 
							return PMD_SHIFT - PAGE_SHIFT;
 | 
				
			||||||
 | 
						if (pe_size == PE_SIZE_PUD)
 | 
				
			||||||
 | 
							return PUD_SHIFT - PAGE_SHIFT;
 | 
				
			||||||
 | 
						return ~0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* We choose 4096 entries - same as per-zone page wait tables */
 | 
					/* We choose 4096 entries - same as per-zone page wait tables */
 | 
				
			||||||
#define DAX_WAIT_TABLE_BITS 12
 | 
					#define DAX_WAIT_TABLE_BITS 12
 | 
				
			||||||
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
 | 
					#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
 | 
				
			||||||
| 
						 | 
					@ -46,6 +57,9 @@
 | 
				
			||||||
#define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 | 
					#define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 | 
				
			||||||
#define PG_PMD_NR	(PMD_SIZE >> PAGE_SHIFT)
 | 
					#define PG_PMD_NR	(PMD_SIZE >> PAGE_SHIFT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* The order of a PMD entry */
 | 
				
			||||||
 | 
					#define PMD_ORDER	(PMD_SHIFT - PAGE_SHIFT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
 | 
					static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __init init_dax_wait_table(void)
 | 
					static int __init init_dax_wait_table(void)
 | 
				
			||||||
| 
						 | 
					@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags)
 | 
				
			||||||
			DAX_LOCKED);
 | 
								DAX_LOCKED);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool dax_is_locked(void *entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return xa_to_value(entry) & DAX_LOCKED;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static unsigned int dax_entry_order(void *entry)
 | 
					static unsigned int dax_entry_order(void *entry)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (xa_to_value(entry) & DAX_PMD)
 | 
						if (xa_to_value(entry) & DAX_PMD)
 | 
				
			||||||
		return PMD_SHIFT - PAGE_SHIFT;
 | 
							return PMD_ORDER;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa,
 | 
				
			||||||
		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 | 
							__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dax_wake_mapping_entry_waiter(xas->xa, xas->xa_index, entry,
 | 
				
			||||||
 | 
													wake_all);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Look up entry in page cache, wait for it to become unlocked if it
 | 
				
			||||||
 | 
					 * is a DAX entry and return it.  The caller must subsequently call
 | 
				
			||||||
 | 
					 * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
 | 
				
			||||||
 | 
					 * if it did.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Must be called with the i_pages lock held.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void *get_unlocked_entry(struct xa_state *xas)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						void *entry;
 | 
				
			||||||
 | 
						struct wait_exceptional_entry_queue ewait;
 | 
				
			||||||
 | 
						wait_queue_head_t *wq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						init_wait(&ewait.wait);
 | 
				
			||||||
 | 
						ewait.wait.func = wake_exceptional_entry_func;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;) {
 | 
				
			||||||
 | 
							entry = xas_load(xas);
 | 
				
			||||||
 | 
							if (!entry || xa_is_internal(entry) ||
 | 
				
			||||||
 | 
									WARN_ON_ONCE(!xa_is_value(entry)) ||
 | 
				
			||||||
 | 
									!dax_is_locked(entry))
 | 
				
			||||||
 | 
								return entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							wq = dax_entry_waitqueue(xas->xa, xas->xa_index, entry,
 | 
				
			||||||
 | 
									&ewait.key);
 | 
				
			||||||
 | 
							prepare_to_wait_exclusive(wq, &ewait.wait,
 | 
				
			||||||
 | 
										  TASK_UNINTERRUPTIBLE);
 | 
				
			||||||
 | 
							xas_unlock_irq(xas);
 | 
				
			||||||
 | 
							xas_reset(xas);
 | 
				
			||||||
 | 
							schedule();
 | 
				
			||||||
 | 
							finish_wait(wq, &ewait.wait);
 | 
				
			||||||
 | 
							xas_lock_irq(xas);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void put_unlocked_entry(struct xa_state *xas, void *entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/* If we were the only waiter woken, wake the next one */
 | 
				
			||||||
 | 
						if (entry)
 | 
				
			||||||
 | 
							dax_wake_entry(xas, entry, false);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * We used the xa_state to get the entry, but then we locked the entry and
 | 
				
			||||||
 | 
					 * dropped the xa_lock, so we know the xa_state is stale and must be reset
 | 
				
			||||||
 | 
					 * before use.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void dax_unlock_entry(struct xa_state *xas, void *entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						void *old;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						xas_reset(xas);
 | 
				
			||||||
 | 
						xas_lock_irq(xas);
 | 
				
			||||||
 | 
						old = xas_store(xas, entry);
 | 
				
			||||||
 | 
						xas_unlock_irq(xas);
 | 
				
			||||||
 | 
						BUG_ON(!dax_is_locked(old));
 | 
				
			||||||
 | 
						dax_wake_entry(xas, entry, false);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Return: The entry stored at this location before it was locked.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void *dax_lock_entry(struct xa_state *xas, void *entry)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long v = xa_to_value(entry);
 | 
				
			||||||
 | 
						return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Check whether the given slot is locked.  Must be called with the i_pages
 | 
					 * Check whether the given slot is locked.  Must be called with the i_pages
 | 
				
			||||||
 * lock held.
 | 
					 * lock held.
 | 
				
			||||||
| 
						 | 
					@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
 | 
					 * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
 | 
				
			||||||
 * @vmf: The description of the fault
 | 
					 * @vmf: The description of the fault
 | 
				
			||||||
 * @pe_size: Size of entry to be inserted
 | 
					 | 
				
			||||||
 * @pfn: PFN to insert
 | 
					 * @pfn: PFN to insert
 | 
				
			||||||
 | 
					 * @order: Order of entry to insert.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This function inserts a writeable PTE or PMD entry into the page tables
 | 
					 * This function inserts a writeable PTE or PMD entry into the page tables
 | 
				
			||||||
 * for an mmaped DAX file.  It also marks the page cache entry as dirty.
 | 
					 * for an mmaped DAX file.  It also marks the page cache entry as dirty.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
 | 
					static vm_fault_t
 | 
				
			||||||
				  enum page_entry_size pe_size,
 | 
					dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
 | 
				
			||||||
				  pfn_t pfn)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 | 
						struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 | 
				
			||||||
	void *entry, **slot;
 | 
						XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
 | 
				
			||||||
	pgoff_t index = vmf->pgoff;
 | 
						void *entry;
 | 
				
			||||||
	vm_fault_t ret;
 | 
						vm_fault_t ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xa_lock_irq(&mapping->i_pages);
 | 
						xas_lock_irq(&xas);
 | 
				
			||||||
	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 | 
						entry = get_unlocked_entry(&xas);
 | 
				
			||||||
	/* Did we race with someone splitting entry or so? */
 | 
						/* Did we race with someone splitting entry or so? */
 | 
				
			||||||
	if (!entry ||
 | 
						if (!entry ||
 | 
				
			||||||
	    (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
 | 
						    (order == 0 && !dax_is_pte_entry(entry)) ||
 | 
				
			||||||
	    (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
 | 
						    (order == PMD_ORDER && (xa_is_internal(entry) ||
 | 
				
			||||||
		put_unlocked_mapping_entry(mapping, index, entry);
 | 
									    !dax_is_pmd_entry(entry)))) {
 | 
				
			||||||
		xa_unlock_irq(&mapping->i_pages);
 | 
							put_unlocked_entry(&xas, entry);
 | 
				
			||||||
 | 
							xas_unlock_irq(&xas);
 | 
				
			||||||
		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 | 
							trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 | 
				
			||||||
						      VM_FAULT_NOPAGE);
 | 
											      VM_FAULT_NOPAGE);
 | 
				
			||||||
		return VM_FAULT_NOPAGE;
 | 
							return VM_FAULT_NOPAGE;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
 | 
						xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
 | 
				
			||||||
	entry = lock_slot(mapping, slot);
 | 
						dax_lock_entry(&xas, entry);
 | 
				
			||||||
	xa_unlock_irq(&mapping->i_pages);
 | 
						xas_unlock_irq(&xas);
 | 
				
			||||||
	switch (pe_size) {
 | 
						if (order == 0)
 | 
				
			||||||
	case PE_SIZE_PTE:
 | 
					 | 
				
			||||||
		ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
 | 
							ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
 | 
				
			||||||
		break;
 | 
					 | 
				
			||||||
#ifdef CONFIG_FS_DAX_PMD
 | 
					#ifdef CONFIG_FS_DAX_PMD
 | 
				
			||||||
	case PE_SIZE_PMD:
 | 
						else if (order == PMD_ORDER)
 | 
				
			||||||
		ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
 | 
							ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
 | 
				
			||||||
			pfn, true);
 | 
								pfn, true);
 | 
				
			||||||
		break;
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	default:
 | 
						else
 | 
				
			||||||
		ret = VM_FAULT_FALLBACK;
 | 
							ret = VM_FAULT_FALLBACK;
 | 
				
			||||||
	}
 | 
						dax_unlock_entry(&xas, entry);
 | 
				
			||||||
	put_locked_mapping_entry(mapping, index);
 | 
					 | 
				
			||||||
	trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
 | 
						trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
	loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
 | 
						loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
 | 
				
			||||||
	size_t len = 0;
 | 
						unsigned int order = pe_order(pe_size);
 | 
				
			||||||
 | 
						size_t len = PAGE_SIZE << order;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pe_size == PE_SIZE_PTE)
 | 
					 | 
				
			||||||
		len = PAGE_SIZE;
 | 
					 | 
				
			||||||
	else if (pe_size == PE_SIZE_PMD)
 | 
					 | 
				
			||||||
		len = PMD_SIZE;
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		WARN_ON_ONCE(1);
 | 
					 | 
				
			||||||
	err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
 | 
						err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		return VM_FAULT_SIGBUS;
 | 
							return VM_FAULT_SIGBUS;
 | 
				
			||||||
	return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
 | 
						return dax_insert_pfn_mkwrite(vmf, pfn, order);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
 | 
					EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue