forked from mirrors/linux
		
	dax,ext2: replace the XIP page fault handler with the DAX page fault handler
Instead of calling aops->get_xip_mem from the fault handler, the filesystem passes a get_block_t that is used to find the appropriate blocks. This requires that all architectures implement copy_user_page(). At the time of writing, mips and arm do not. Patches exist and are in progress. [akpm@linux-foundation.org: remap_file_pages went away] Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Andreas Dilger <andreas.dilger@intel.com> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Chinner <david@fromorbit.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									289c6aedac
								
							
						
					
					
						commit
						f7ca90b160
					
				
					 4 changed files with 276 additions and 209 deletions
				
			
		
							
								
								
									
										241
									
								
								fs/dax.c
									
									
									
									
									
								
							
							
						
						
									
										241
									
								
								fs/dax.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -19,9 +19,13 @@
 | 
			
		|||
#include <linux/buffer_head.h>
 | 
			
		||||
#include <linux/fs.h>
 | 
			
		||||
#include <linux/genhd.h>
 | 
			
		||||
#include <linux/highmem.h>
 | 
			
		||||
#include <linux/memcontrol.h>
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/mutex.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
#include <linux/uio.h>
 | 
			
		||||
#include <linux/vmstat.h>
 | 
			
		||||
 | 
			
		||||
int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -221,3 +225,240 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
 | 
			
		|||
	return retval;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(dax_do_io);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The user has performed a load from a hole in the file.  Allocating
 | 
			
		||||
 * a new page in the file would cause excessive storage usage for
 | 
			
		||||
 * workloads with sparse files.  We allocate a page cache page instead.
 | 
			
		||||
 * We'll kick it out of the page cache if it's ever written to,
 | 
			
		||||
 * otherwise it will simply fall out of the page cache under memory
 | 
			
		||||
 * pressure without ever having been dirtied.
 | 
			
		||||
 */
 | 
			
		||||
static int dax_load_hole(struct address_space *mapping, struct page *page,
 | 
			
		||||
							struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long size;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
	if (!page)
 | 
			
		||||
		page = find_or_create_page(mapping, vmf->pgoff,
 | 
			
		||||
						GFP_KERNEL | __GFP_ZERO);
 | 
			
		||||
	if (!page)
 | 
			
		||||
		return VM_FAULT_OOM;
 | 
			
		||||
	/* Recheck i_size under page lock to avoid truncate race */
 | 
			
		||||
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 | 
			
		||||
	if (vmf->pgoff >= size) {
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		page_cache_release(page);
 | 
			
		||||
		return VM_FAULT_SIGBUS;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	vmf->page = page;
 | 
			
		||||
	return VM_FAULT_LOCKED;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int copy_user_bh(struct page *to, struct buffer_head *bh,
 | 
			
		||||
			unsigned blkbits, unsigned long vaddr)
 | 
			
		||||
{
 | 
			
		||||
	void *vfrom, *vto;
 | 
			
		||||
	if (dax_get_addr(bh, &vfrom, blkbits) < 0)
 | 
			
		||||
		return -EIO;
 | 
			
		||||
	vto = kmap_atomic(to);
 | 
			
		||||
	copy_user_page(vto, vfrom, vaddr, to);
 | 
			
		||||
	kunmap_atomic(vto);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 | 
			
		||||
			struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	struct address_space *mapping = inode->i_mapping;
 | 
			
		||||
	sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
 | 
			
		||||
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
 | 
			
		||||
	void *addr;
 | 
			
		||||
	unsigned long pfn;
 | 
			
		||||
	pgoff_t size;
 | 
			
		||||
	int error;
 | 
			
		||||
 | 
			
		||||
	i_mmap_lock_read(mapping);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check truncate didn't happen while we were allocating a block.
 | 
			
		||||
	 * If it did, this block may or may not be still allocated to the
 | 
			
		||||
	 * file.  We can't tell the filesystem to free it because we can't
 | 
			
		||||
	 * take i_mutex here.  In the worst case, the file still has blocks
 | 
			
		||||
	 * allocated past the end of the file.
 | 
			
		||||
	 */
 | 
			
		||||
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 | 
			
		||||
	if (unlikely(vmf->pgoff >= size)) {
 | 
			
		||||
		error = -EIO;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	error = bdev_direct_access(bh->b_bdev, sector, &addr, &pfn, bh->b_size);
 | 
			
		||||
	if (error < 0)
 | 
			
		||||
		goto out;
 | 
			
		||||
	if (error < PAGE_SIZE) {
 | 
			
		||||
		error = -EIO;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (buffer_unwritten(bh) || buffer_new(bh))
 | 
			
		||||
		clear_page(addr);
 | 
			
		||||
 | 
			
		||||
	error = vm_insert_mixed(vma, vaddr, pfn);
 | 
			
		||||
 | 
			
		||||
 out:
 | 
			
		||||
	i_mmap_unlock_read(mapping);
 | 
			
		||||
 | 
			
		||||
	if (bh->b_end_io)
 | 
			
		||||
		bh->b_end_io(bh, 1);
 | 
			
		||||
 | 
			
		||||
	return error;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 | 
			
		||||
			get_block_t get_block)
 | 
			
		||||
{
 | 
			
		||||
	struct file *file = vma->vm_file;
 | 
			
		||||
	struct address_space *mapping = file->f_mapping;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	struct buffer_head bh;
 | 
			
		||||
	unsigned long vaddr = (unsigned long)vmf->virtual_address;
 | 
			
		||||
	unsigned blkbits = inode->i_blkbits;
 | 
			
		||||
	sector_t block;
 | 
			
		||||
	pgoff_t size;
 | 
			
		||||
	int error;
 | 
			
		||||
	int major = 0;
 | 
			
		||||
 | 
			
		||||
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 | 
			
		||||
	if (vmf->pgoff >= size)
 | 
			
		||||
		return VM_FAULT_SIGBUS;
 | 
			
		||||
 | 
			
		||||
	memset(&bh, 0, sizeof(bh));
 | 
			
		||||
	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
 | 
			
		||||
	bh.b_size = PAGE_SIZE;
 | 
			
		||||
 | 
			
		||||
 repeat:
 | 
			
		||||
	page = find_get_page(mapping, vmf->pgoff);
 | 
			
		||||
	if (page) {
 | 
			
		||||
		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
 | 
			
		||||
			page_cache_release(page);
 | 
			
		||||
			return VM_FAULT_RETRY;
 | 
			
		||||
		}
 | 
			
		||||
		if (unlikely(page->mapping != mapping)) {
 | 
			
		||||
			unlock_page(page);
 | 
			
		||||
			page_cache_release(page);
 | 
			
		||||
			goto repeat;
 | 
			
		||||
		}
 | 
			
		||||
		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 | 
			
		||||
		if (unlikely(vmf->pgoff >= size)) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * We have a struct page covering a hole in the file
 | 
			
		||||
			 * from a read fault and we've raced with a truncate
 | 
			
		||||
			 */
 | 
			
		||||
			error = -EIO;
 | 
			
		||||
			goto unlock_page;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	error = get_block(inode, block, &bh, 0);
 | 
			
		||||
	if (!error && (bh.b_size < PAGE_SIZE))
 | 
			
		||||
		error = -EIO;		/* fs corruption? */
 | 
			
		||||
	if (error)
 | 
			
		||||
		goto unlock_page;
 | 
			
		||||
 | 
			
		||||
	if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
 | 
			
		||||
		if (vmf->flags & FAULT_FLAG_WRITE) {
 | 
			
		||||
			error = get_block(inode, block, &bh, 1);
 | 
			
		||||
			count_vm_event(PGMAJFAULT);
 | 
			
		||||
			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 | 
			
		||||
			major = VM_FAULT_MAJOR;
 | 
			
		||||
			if (!error && (bh.b_size < PAGE_SIZE))
 | 
			
		||||
				error = -EIO;
 | 
			
		||||
			if (error)
 | 
			
		||||
				goto unlock_page;
 | 
			
		||||
		} else {
 | 
			
		||||
			return dax_load_hole(mapping, page, vmf);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (vmf->cow_page) {
 | 
			
		||||
		struct page *new_page = vmf->cow_page;
 | 
			
		||||
		if (buffer_written(&bh))
 | 
			
		||||
			error = copy_user_bh(new_page, &bh, blkbits, vaddr);
 | 
			
		||||
		else
 | 
			
		||||
			clear_user_highpage(new_page, vaddr);
 | 
			
		||||
		if (error)
 | 
			
		||||
			goto unlock_page;
 | 
			
		||||
		vmf->page = page;
 | 
			
		||||
		if (!page) {
 | 
			
		||||
			i_mmap_lock_read(mapping);
 | 
			
		||||
			/* Check we didn't race with truncate */
 | 
			
		||||
			size = (i_size_read(inode) + PAGE_SIZE - 1) >>
 | 
			
		||||
								PAGE_SHIFT;
 | 
			
		||||
			if (vmf->pgoff >= size) {
 | 
			
		||||
				i_mmap_unlock_read(mapping);
 | 
			
		||||
				error = -EIO;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		return VM_FAULT_LOCKED;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Check we didn't race with a read fault installing a new page */
 | 
			
		||||
	if (!page && major)
 | 
			
		||||
		page = find_lock_page(mapping, vmf->pgoff);
 | 
			
		||||
 | 
			
		||||
	if (page) {
 | 
			
		||||
		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
 | 
			
		||||
							PAGE_CACHE_SIZE, 0);
 | 
			
		||||
		delete_from_page_cache(page);
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		page_cache_release(page);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	error = dax_insert_mapping(inode, &bh, vma, vmf);
 | 
			
		||||
 | 
			
		||||
 out:
 | 
			
		||||
	if (error == -ENOMEM)
 | 
			
		||||
		return VM_FAULT_OOM | major;
 | 
			
		||||
	/* -EBUSY is fine, somebody else faulted on the same PTE */
 | 
			
		||||
	if ((error < 0) && (error != -EBUSY))
 | 
			
		||||
		return VM_FAULT_SIGBUS | major;
 | 
			
		||||
	return VM_FAULT_NOPAGE | major;
 | 
			
		||||
 | 
			
		||||
 unlock_page:
 | 
			
		||||
	if (page) {
 | 
			
		||||
		unlock_page(page);
 | 
			
		||||
		page_cache_release(page);
 | 
			
		||||
	}
 | 
			
		||||
	goto out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * dax_fault - handle a page fault on a DAX file
 | 
			
		||||
 * @vma: The virtual memory area where the fault occurred
 | 
			
		||||
 * @vmf: The description of the fault
 | 
			
		||||
 * @get_block: The filesystem method used to translate file offsets to blocks
 | 
			
		||||
 *
 | 
			
		||||
 * When a page fault occurs, filesystems may call this helper in their
 | 
			
		||||
 * fault handler for DAX files.
 | 
			
		||||
 */
 | 
			
		||||
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 | 
			
		||||
			get_block_t get_block)
 | 
			
		||||
{
 | 
			
		||||
	int result;
 | 
			
		||||
	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
 | 
			
		||||
 | 
			
		||||
	if (vmf->flags & FAULT_FLAG_WRITE) {
 | 
			
		||||
		sb_start_pagefault(sb);
 | 
			
		||||
		file_update_time(vma->vm_file);
 | 
			
		||||
	}
 | 
			
		||||
	result = do_dax_fault(vma, vmf, get_block);
 | 
			
		||||
	if (vmf->flags & FAULT_FLAG_WRITE)
 | 
			
		||||
		sb_end_pagefault(sb);
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(dax_fault);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,36 @@
 | 
			
		|||
#include "xattr.h"
 | 
			
		||||
#include "acl.h"
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_EXT2_FS_XIP
 | 
			
		||||
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	return dax_fault(vma, vmf, ext2_get_block);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	return dax_mkwrite(vma, vmf, ext2_get_block);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct vm_operations_struct ext2_dax_vm_ops = {
 | 
			
		||||
	.fault		= ext2_dax_fault,
 | 
			
		||||
	.page_mkwrite	= ext2_dax_mkwrite,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
 | 
			
		||||
{
 | 
			
		||||
	if (!IS_DAX(file_inode(file)))
 | 
			
		||||
		return generic_file_mmap(file, vma);
 | 
			
		||||
 | 
			
		||||
	file_accessed(file);
 | 
			
		||||
	vma->vm_ops = &ext2_dax_vm_ops;
 | 
			
		||||
	vma->vm_flags |= VM_MIXEDMAP;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
#define ext2_file_mmap	generic_file_mmap
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Called when filp is released. This happens when all file descriptors
 | 
			
		||||
 * for a single struct file are closed. Note that different open() calls
 | 
			
		||||
| 
						 | 
				
			
			@ -70,7 +100,7 @@ const struct file_operations ext2_file_operations = {
 | 
			
		|||
#ifdef CONFIG_COMPAT
 | 
			
		||||
	.compat_ioctl	= ext2_compat_ioctl,
 | 
			
		||||
#endif
 | 
			
		||||
	.mmap		= generic_file_mmap,
 | 
			
		||||
	.mmap		= ext2_file_mmap,
 | 
			
		||||
	.open		= dquot_file_open,
 | 
			
		||||
	.release	= ext2_release_file,
 | 
			
		||||
	.fsync		= ext2_fsync,
 | 
			
		||||
| 
						 | 
				
			
			@ -89,7 +119,7 @@ const struct file_operations ext2_xip_file_operations = {
 | 
			
		|||
#ifdef CONFIG_COMPAT
 | 
			
		||||
	.compat_ioctl	= ext2_compat_ioctl,
 | 
			
		||||
#endif
 | 
			
		||||
	.mmap		= xip_file_mmap,
 | 
			
		||||
	.mmap		= ext2_file_mmap,
 | 
			
		||||
	.open		= dquot_file_open,
 | 
			
		||||
	.release	= ext2_release_file,
 | 
			
		||||
	.fsync		= ext2_fsync,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -51,6 +51,7 @@ struct swap_info_struct;
 | 
			
		|||
struct seq_file;
 | 
			
		||||
struct workqueue_struct;
 | 
			
		||||
struct iov_iter;
 | 
			
		||||
struct vm_fault;
 | 
			
		||||
 | 
			
		||||
extern void __init inode_init(void);
 | 
			
		||||
extern void __init inode_init_early(void);
 | 
			
		||||
| 
						 | 
				
			
			@ -2590,9 +2591,10 @@ extern int nonseekable_open(struct inode * inode, struct file * filp);
 | 
			
		|||
ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *,
 | 
			
		||||
		loff_t, get_block_t, dio_iodone_t, int flags);
 | 
			
		||||
int dax_clear_blocks(struct inode *, sector_t block, long size);
 | 
			
		||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 | 
			
		||||
#define dax_mkwrite(vma, vmf, gb)	dax_fault(vma, vmf, gb)
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_FS_XIP
 | 
			
		||||
extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
 | 
			
		||||
extern int xip_truncate_page(struct address_space *mapping, loff_t from);
 | 
			
		||||
#else
 | 
			
		||||
static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										206
									
								
								mm/filemap_xip.c
									
									
									
									
									
								
							
							
						
						
									
										206
									
								
								mm/filemap_xip.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -22,212 +22,6 @@
 | 
			
		|||
#include <asm/tlbflush.h>
 | 
			
		||||
#include <asm/io.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * We do use our own empty page to avoid interference with other users
 | 
			
		||||
 * of ZERO_PAGE(), such as /dev/zero
 | 
			
		||||
 */
 | 
			
		||||
static DEFINE_MUTEX(xip_sparse_mutex);
 | 
			
		||||
static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq);
 | 
			
		||||
static struct page *__xip_sparse_page;
 | 
			
		||||
 | 
			
		||||
/* called under xip_sparse_mutex */
 | 
			
		||||
static struct page *xip_sparse_page(void)
 | 
			
		||||
{
 | 
			
		||||
	if (!__xip_sparse_page) {
 | 
			
		||||
		struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
 | 
			
		||||
 | 
			
		||||
		if (page)
 | 
			
		||||
			__xip_sparse_page = page;
 | 
			
		||||
	}
 | 
			
		||||
	return __xip_sparse_page;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * __xip_unmap is invoked from xip_unmap and xip_write
 | 
			
		||||
 *
 | 
			
		||||
 * This function walks all vmas of the address_space and unmaps the
 | 
			
		||||
 * __xip_sparse_page when found at pgoff.
 | 
			
		||||
 */
 | 
			
		||||
static void __xip_unmap(struct address_space * mapping, unsigned long pgoff)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned count;
 | 
			
		||||
	int locked = 0;
 | 
			
		||||
 | 
			
		||||
	count = read_seqcount_begin(&xip_sparse_seq);
 | 
			
		||||
 | 
			
		||||
	page = __xip_sparse_page;
 | 
			
		||||
	if (!page)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
retry:
 | 
			
		||||
	i_mmap_lock_read(mapping);
 | 
			
		||||
	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 | 
			
		||||
		pte_t *pte, pteval;
 | 
			
		||||
		spinlock_t *ptl;
 | 
			
		||||
		struct mm_struct *mm = vma->vm_mm;
 | 
			
		||||
		unsigned long address = vma->vm_start +
 | 
			
		||||
			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 | 
			
		||||
 | 
			
		||||
		BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 | 
			
		||||
		pte = page_check_address(page, mm, address, &ptl, 1);
 | 
			
		||||
		if (pte) {
 | 
			
		||||
			/* Nuke the page table entry. */
 | 
			
		||||
			flush_cache_page(vma, address, pte_pfn(*pte));
 | 
			
		||||
			pteval = ptep_clear_flush(vma, address, pte);
 | 
			
		||||
			page_remove_rmap(page);
 | 
			
		||||
			dec_mm_counter(mm, MM_FILEPAGES);
 | 
			
		||||
			BUG_ON(pte_dirty(pteval));
 | 
			
		||||
			pte_unmap_unlock(pte, ptl);
 | 
			
		||||
			/* must invalidate_page _before_ freeing the page */
 | 
			
		||||
			mmu_notifier_invalidate_page(mm, address);
 | 
			
		||||
			page_cache_release(page);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	i_mmap_unlock_read(mapping);
 | 
			
		||||
 | 
			
		||||
	if (locked) {
 | 
			
		||||
		mutex_unlock(&xip_sparse_mutex);
 | 
			
		||||
	} else if (read_seqcount_retry(&xip_sparse_seq, count)) {
 | 
			
		||||
		mutex_lock(&xip_sparse_mutex);
 | 
			
		||||
		locked = 1;
 | 
			
		||||
		goto retry;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * xip_fault() is invoked via the vma operations vector for a
 | 
			
		||||
 * mapped memory region to read in file data during a page fault.
 | 
			
		||||
 *
 | 
			
		||||
 * This function is derived from filemap_fault, but used for execute in place
 | 
			
		||||
 */
 | 
			
		||||
static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	struct file *file = vma->vm_file;
 | 
			
		||||
	struct address_space *mapping = file->f_mapping;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
	pgoff_t size;
 | 
			
		||||
	void *xip_mem;
 | 
			
		||||
	unsigned long xip_pfn;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	int error;
 | 
			
		||||
 | 
			
		||||
	/* XXX: are VM_FAULT_ codes OK? */
 | 
			
		||||
again:
 | 
			
		||||
	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 | 
			
		||||
	if (vmf->pgoff >= size)
 | 
			
		||||
		return VM_FAULT_SIGBUS;
 | 
			
		||||
 | 
			
		||||
	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
 | 
			
		||||
						&xip_mem, &xip_pfn);
 | 
			
		||||
	if (likely(!error))
 | 
			
		||||
		goto found;
 | 
			
		||||
	if (error != -ENODATA)
 | 
			
		||||
		return VM_FAULT_OOM;
 | 
			
		||||
 | 
			
		||||
	/* sparse block */
 | 
			
		||||
	if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
 | 
			
		||||
	    (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) &&
 | 
			
		||||
	    (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
 | 
			
		||||
		int err;
 | 
			
		||||
 | 
			
		||||
		/* maybe shared writable, allocate new block */
 | 
			
		||||
		mutex_lock(&xip_sparse_mutex);
 | 
			
		||||
		error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
 | 
			
		||||
							&xip_mem, &xip_pfn);
 | 
			
		||||
		mutex_unlock(&xip_sparse_mutex);
 | 
			
		||||
		if (error)
 | 
			
		||||
			return VM_FAULT_SIGBUS;
 | 
			
		||||
		/* unmap sparse mappings at pgoff from all other vmas */
 | 
			
		||||
		__xip_unmap(mapping, vmf->pgoff);
 | 
			
		||||
 | 
			
		||||
found:
 | 
			
		||||
		/*
 | 
			
		||||
		 * We must recheck i_size under i_mmap_rwsem to prevent races
 | 
			
		||||
		 * with truncation
 | 
			
		||||
		 */
 | 
			
		||||
		i_mmap_lock_read(mapping);
 | 
			
		||||
		size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
 | 
			
		||||
							PAGE_CACHE_SHIFT;
 | 
			
		||||
		if (unlikely(vmf->pgoff >= size)) {
 | 
			
		||||
			i_mmap_unlock_read(mapping);
 | 
			
		||||
			return VM_FAULT_SIGBUS;
 | 
			
		||||
		}
 | 
			
		||||
		err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
 | 
			
		||||
							xip_pfn);
 | 
			
		||||
		i_mmap_unlock_read(mapping);
 | 
			
		||||
		if (err == -ENOMEM)
 | 
			
		||||
			return VM_FAULT_OOM;
 | 
			
		||||
		/*
 | 
			
		||||
		 * err == -EBUSY is fine, we've raced against another thread
 | 
			
		||||
		 * that faulted-in the same page
 | 
			
		||||
		 */
 | 
			
		||||
		if (err != -EBUSY)
 | 
			
		||||
			BUG_ON(err);
 | 
			
		||||
		return VM_FAULT_NOPAGE;
 | 
			
		||||
	} else {
 | 
			
		||||
		int err, ret = VM_FAULT_OOM;
 | 
			
		||||
 | 
			
		||||
		mutex_lock(&xip_sparse_mutex);
 | 
			
		||||
		write_seqcount_begin(&xip_sparse_seq);
 | 
			
		||||
		error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
 | 
			
		||||
							&xip_mem, &xip_pfn);
 | 
			
		||||
		if (unlikely(!error)) {
 | 
			
		||||
			write_seqcount_end(&xip_sparse_seq);
 | 
			
		||||
			mutex_unlock(&xip_sparse_mutex);
 | 
			
		||||
			goto again;
 | 
			
		||||
		}
 | 
			
		||||
		if (error != -ENODATA)
 | 
			
		||||
			goto out;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We must recheck i_size under i_mmap_rwsem to prevent races
 | 
			
		||||
		 * with truncation
 | 
			
		||||
		 */
 | 
			
		||||
		i_mmap_lock_read(mapping);
 | 
			
		||||
		size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
 | 
			
		||||
							PAGE_CACHE_SHIFT;
 | 
			
		||||
		if (unlikely(vmf->pgoff >= size)) {
 | 
			
		||||
			ret = VM_FAULT_SIGBUS;
 | 
			
		||||
			goto unlock;
 | 
			
		||||
		}
 | 
			
		||||
		/* not shared and writable, use xip_sparse_page() */
 | 
			
		||||
		page = xip_sparse_page();
 | 
			
		||||
		if (!page)
 | 
			
		||||
			goto unlock;
 | 
			
		||||
		err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
 | 
			
		||||
							page);
 | 
			
		||||
		if (err == -ENOMEM)
 | 
			
		||||
			goto unlock;
 | 
			
		||||
 | 
			
		||||
		ret = VM_FAULT_NOPAGE;
 | 
			
		||||
unlock:
 | 
			
		||||
		i_mmap_unlock_read(mapping);
 | 
			
		||||
out:
 | 
			
		||||
		write_seqcount_end(&xip_sparse_seq);
 | 
			
		||||
		mutex_unlock(&xip_sparse_mutex);
 | 
			
		||||
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct vm_operations_struct xip_file_vm_ops = {
 | 
			
		||||
	.fault	= xip_file_fault,
 | 
			
		||||
	.page_mkwrite	= filemap_page_mkwrite,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
 | 
			
		||||
{
 | 
			
		||||
	BUG_ON(!file->f_mapping->a_ops->get_xip_mem);
 | 
			
		||||
 | 
			
		||||
	file_accessed(file);
 | 
			
		||||
	vma->vm_ops = &xip_file_vm_ops;
 | 
			
		||||
	vma->vm_flags |= VM_MIXEDMAP;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(xip_file_mmap);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * truncate a page used for execute in place
 | 
			
		||||
 * functionality is analog to block_truncate_page but does use get_xip_mem
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue