mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm/filemap.c: generic_file_buffered_read() now uses find_get_pages_contig
Convert generic_file_buffered_read() to get pages to read from in batches, and then copy data to userspace from many pages at once - in particular, we now don't touch any cachelines that might be contended while we're in the loop to copy data to userspace. This is is a performance improvement on workloads that do buffered reads with large blocksizes, and a very large performance improvement if that file is also being accessed concurrently by different threads. On smaller reads (512 bytes), there's a very small performance improvement (1%, within the margin of error). akpm: kernel test robot found a 32% speedup on one test: https://lkml.kernel.org/r/20201030081456.GY31092@shao2-debian Link: https://lkml.kernel.org/r/20201025212949.602194-3-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: kernel test robot <rong.a.chen@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									723ef24b9b
								
							
						
					
					
						commit
						06c0444290
					
				
					 1 changed files with 177 additions and 140 deletions
				
			
		
							
								
								
									
										317
									
								
								mm/filemap.c
									
									
									
									
									
								
							
							
						
						
									
										317
									
								
								mm/filemap.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -2176,67 +2176,6 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
 | 
			
		|||
		return lock_page_killable(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int generic_file_buffered_read_page_ok(struct kiocb *iocb,
 | 
			
		||||
			struct iov_iter *iter,
 | 
			
		||||
			struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	struct address_space *mapping = iocb->ki_filp->f_mapping;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
	struct file_ra_state *ra = &iocb->ki_filp->f_ra;
 | 
			
		||||
	unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
 | 
			
		||||
	unsigned int bytes, copied;
 | 
			
		||||
	loff_t isize, end_offset;
 | 
			
		||||
 | 
			
		||||
	BUG_ON(iocb->ki_pos >> PAGE_SHIFT != page->index);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * i_size must be checked after we know the page is Uptodate.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Checking i_size after the check allows us to calculate
 | 
			
		||||
	 * the correct value for "bytes", which means the zero-filled
 | 
			
		||||
	 * part of the page is not copied back to userspace (unless
 | 
			
		||||
	 * another truncate extends the file - this is desired though).
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	isize = i_size_read(inode);
 | 
			
		||||
	if (unlikely(iocb->ki_pos >= isize))
 | 
			
		||||
		return 1;
 | 
			
		||||
 | 
			
		||||
	end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 | 
			
		||||
 | 
			
		||||
	bytes = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
 | 
			
		||||
 | 
			
		||||
	/* If users can be writing to this page using arbitrary
 | 
			
		||||
	 * virtual addresses, take care about potential aliasing
 | 
			
		||||
	 * before reading the page on the kernel side.
 | 
			
		||||
	 */
 | 
			
		||||
	if (mapping_writably_mapped(mapping))
 | 
			
		||||
		flush_dcache_page(page);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Ok, we have the page, and it's up-to-date, so
 | 
			
		||||
	 * now we can copy it to user space...
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	copied = copy_page_to_iter(page, offset, bytes, iter);
 | 
			
		||||
 | 
			
		||||
	iocb->ki_pos += copied;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * When a sequential read accesses a page several times,
 | 
			
		||||
	 * only mark it as accessed the first time.
 | 
			
		||||
	 */
 | 
			
		||||
	if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
 | 
			
		||||
		mark_page_accessed(page);
 | 
			
		||||
 | 
			
		||||
	ra->prev_pos = iocb->ki_pos;
 | 
			
		||||
 | 
			
		||||
	if (copied < bytes)
 | 
			
		||||
		return -EFAULT;
 | 
			
		||||
 | 
			
		||||
	return !iov_iter_count(iter) || iocb->ki_pos == isize;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct page *
 | 
			
		||||
generic_file_buffered_read_readpage(struct kiocb *iocb,
 | 
			
		||||
				    struct file *filp,
 | 
			
		||||
| 
						 | 
				
			
			@ -2394,6 +2333,92 @@ generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
 | 
			
		|||
	return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
 | 
			
		||||
						struct iov_iter *iter,
 | 
			
		||||
						struct page **pages,
 | 
			
		||||
						unsigned int nr)
 | 
			
		||||
{
 | 
			
		||||
	struct file *filp = iocb->ki_filp;
 | 
			
		||||
	struct address_space *mapping = filp->f_mapping;
 | 
			
		||||
	struct file_ra_state *ra = &filp->f_ra;
 | 
			
		||||
	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 | 
			
		||||
	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
 | 
			
		||||
	int i, j, nr_got, err = 0;
 | 
			
		||||
 | 
			
		||||
	nr = min_t(unsigned long, last_index - index, nr);
 | 
			
		||||
find_page:
 | 
			
		||||
	if (fatal_signal_pending(current))
 | 
			
		||||
		return -EINTR;
 | 
			
		||||
 | 
			
		||||
	nr_got = find_get_pages_contig(mapping, index, nr, pages);
 | 
			
		||||
	if (nr_got)
 | 
			
		||||
		goto got_pages;
 | 
			
		||||
 | 
			
		||||
	if (iocb->ki_flags & IOCB_NOIO)
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
 | 
			
		||||
 | 
			
		||||
	nr_got = find_get_pages_contig(mapping, index, nr, pages);
 | 
			
		||||
	if (nr_got)
 | 
			
		||||
		goto got_pages;
 | 
			
		||||
 | 
			
		||||
	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
 | 
			
		||||
	err = PTR_ERR_OR_ZERO(pages[0]);
 | 
			
		||||
	if (!IS_ERR_OR_NULL(pages[0]))
 | 
			
		||||
		nr_got = 1;
 | 
			
		||||
got_pages:
 | 
			
		||||
	for (i = 0; i < nr_got; i++) {
 | 
			
		||||
		struct page *page = pages[i];
 | 
			
		||||
		pgoff_t pg_index = index + i;
 | 
			
		||||
		loff_t pg_pos = max(iocb->ki_pos,
 | 
			
		||||
				    (loff_t) pg_index << PAGE_SHIFT);
 | 
			
		||||
		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
 | 
			
		||||
 | 
			
		||||
		if (PageReadahead(page)) {
 | 
			
		||||
			if (iocb->ki_flags & IOCB_NOIO) {
 | 
			
		||||
				for (j = i; j < nr_got; j++)
 | 
			
		||||
					put_page(pages[j]);
 | 
			
		||||
				nr_got = i;
 | 
			
		||||
				err = -EAGAIN;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			page_cache_async_readahead(mapping, ra, filp, page,
 | 
			
		||||
					pg_index, last_index - pg_index);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (!PageUptodate(page)) {
 | 
			
		||||
			if ((iocb->ki_flags & IOCB_NOWAIT) ||
 | 
			
		||||
			    ((iocb->ki_flags & IOCB_WAITQ) && i)) {
 | 
			
		||||
				for (j = i; j < nr_got; j++)
 | 
			
		||||
					put_page(pages[j]);
 | 
			
		||||
				nr_got = i;
 | 
			
		||||
				err = -EAGAIN;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			page = generic_file_buffered_read_pagenotuptodate(iocb,
 | 
			
		||||
					filp, iter, page, pg_pos, pg_count);
 | 
			
		||||
			if (IS_ERR_OR_NULL(page)) {
 | 
			
		||||
				for (j = i + 1; j < nr_got; j++)
 | 
			
		||||
					put_page(pages[j]);
 | 
			
		||||
				nr_got = i;
 | 
			
		||||
				err = PTR_ERR_OR_ZERO(page);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (likely(nr_got))
 | 
			
		||||
		return nr_got;
 | 
			
		||||
	if (err)
 | 
			
		||||
		return err;
 | 
			
		||||
	/*
 | 
			
		||||
	 * No pages and no error means we raced and should retry:
 | 
			
		||||
	 */
 | 
			
		||||
	goto find_page;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * generic_file_buffered_read - generic file read routine
 | 
			
		||||
 * @iocb:	the iocb to read
 | 
			
		||||
| 
						 | 
				
			
			@ -2414,104 +2439,116 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 | 
			
		|||
		struct iov_iter *iter, ssize_t written)
 | 
			
		||||
{
 | 
			
		||||
	struct file *filp = iocb->ki_filp;
 | 
			
		||||
	struct file_ra_state *ra = &filp->f_ra;
 | 
			
		||||
	struct address_space *mapping = filp->f_mapping;
 | 
			
		||||
	struct inode *inode = mapping->host;
 | 
			
		||||
	struct file_ra_state *ra = &filp->f_ra;
 | 
			
		||||
	size_t orig_count = iov_iter_count(iter);
 | 
			
		||||
	pgoff_t last_index;
 | 
			
		||||
	int error = 0;
 | 
			
		||||
	struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
 | 
			
		||||
	unsigned int nr_pages = min_t(unsigned int, 512,
 | 
			
		||||
			((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
 | 
			
		||||
			(iocb->ki_pos >> PAGE_SHIFT));
 | 
			
		||||
	int i, pg_nr, error = 0;
 | 
			
		||||
	bool writably_mapped;
 | 
			
		||||
	loff_t isize, end_offset;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
 | 
			
		||||
		return 0;
 | 
			
		||||
	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
 | 
			
		||||
 | 
			
		||||
	last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
 | 
			
		||||
	if (nr_pages > ARRAY_SIZE(pages_onstack))
 | 
			
		||||
		pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If we've already successfully copied some data, then we
 | 
			
		||||
	 * can no longer safely return -EIOCBQUEUED. Hence mark
 | 
			
		||||
	 * an async read NOWAIT at that point.
 | 
			
		||||
	 */
 | 
			
		||||
	if (written && (iocb->ki_flags & IOCB_WAITQ))
 | 
			
		||||
		iocb->ki_flags |= IOCB_NOWAIT;
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
 | 
			
		||||
		struct page *page;
 | 
			
		||||
	if (!pages) {
 | 
			
		||||
		pages = pages_onstack;
 | 
			
		||||
		nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		cond_resched();
 | 
			
		||||
find_page:
 | 
			
		||||
		if (fatal_signal_pending(current)) {
 | 
			
		||||
			error = -EINTR;
 | 
			
		||||
			goto out;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * If we've already successfully copied some data, then we
 | 
			
		||||
		 * can no longer safely return -EIOCBQUEUED. Hence mark
 | 
			
		||||
		 * an async read NOWAIT at that point.
 | 
			
		||||
		 */
 | 
			
		||||
		if ((iocb->ki_flags & IOCB_WAITQ) && written)
 | 
			
		||||
			iocb->ki_flags |= IOCB_NOWAIT;
 | 
			
		||||
 | 
			
		||||
		i = 0;
 | 
			
		||||
		pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
 | 
			
		||||
							     pages, nr_pages);
 | 
			
		||||
		if (pg_nr < 0) {
 | 
			
		||||
			error = pg_nr;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We can't return -EIOCBQUEUED once we've done some work, so
 | 
			
		||||
		 * ensure we don't block:
 | 
			
		||||
		 * i_size must be checked after we know the pages are Uptodate.
 | 
			
		||||
		 *
 | 
			
		||||
		 * Checking i_size after the check allows us to calculate
 | 
			
		||||
		 * the correct value for "nr", which means the zero-filled
 | 
			
		||||
		 * part of the page is not copied back to userspace (unless
 | 
			
		||||
		 * another truncate extends the file - this is desired though).
 | 
			
		||||
		 */
 | 
			
		||||
		if ((iocb->ki_flags & IOCB_WAITQ) &&
 | 
			
		||||
		    (written + orig_count - iov_iter_count(iter)))
 | 
			
		||||
			iocb->ki_flags |= IOCB_NOWAIT;
 | 
			
		||||
		isize = i_size_read(inode);
 | 
			
		||||
		if (unlikely(iocb->ki_pos >= isize))
 | 
			
		||||
			goto put_pages;
 | 
			
		||||
 | 
			
		||||
		page = find_get_page(mapping, index);
 | 
			
		||||
		if (!page) {
 | 
			
		||||
			if (iocb->ki_flags & IOCB_NOIO)
 | 
			
		||||
				goto would_block;
 | 
			
		||||
			page_cache_sync_readahead(mapping,
 | 
			
		||||
					ra, filp,
 | 
			
		||||
					index, last_index - index);
 | 
			
		||||
			page = find_get_page(mapping, index);
 | 
			
		||||
			if (unlikely(page == NULL)) {
 | 
			
		||||
				page = generic_file_buffered_read_no_cached_page(iocb, iter);
 | 
			
		||||
				if (!page)
 | 
			
		||||
					goto find_page;
 | 
			
		||||
				if (IS_ERR(page)) {
 | 
			
		||||
					error = PTR_ERR(page);
 | 
			
		||||
					goto out;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if (PageReadahead(page)) {
 | 
			
		||||
			if (iocb->ki_flags & IOCB_NOIO) {
 | 
			
		||||
				put_page(page);
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
			page_cache_async_readahead(mapping,
 | 
			
		||||
					ra, filp, page,
 | 
			
		||||
					index, last_index - index);
 | 
			
		||||
		}
 | 
			
		||||
		if (!PageUptodate(page)) {
 | 
			
		||||
			if (iocb->ki_flags & IOCB_NOWAIT) {
 | 
			
		||||
				put_page(page);
 | 
			
		||||
				error = -EAGAIN;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
			page = generic_file_buffered_read_pagenotuptodate(iocb,
 | 
			
		||||
					filp, iter, page, iocb->ki_pos, iter->count);
 | 
			
		||||
			if (!page)
 | 
			
		||||
				goto find_page;
 | 
			
		||||
			if (IS_ERR(page)) {
 | 
			
		||||
				error = PTR_ERR(page);
 | 
			
		||||
				goto out;
 | 
			
		||||
		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 | 
			
		||||
 | 
			
		||||
		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
 | 
			
		||||
		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
 | 
			
		||||
			put_page(pages[--pg_nr]);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Once we start copying data, we don't want to be touching any
 | 
			
		||||
		 * cachelines that might be contended:
 | 
			
		||||
		 */
 | 
			
		||||
		writably_mapped = mapping_writably_mapped(mapping);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * When a sequential read accesses a page several times, only
 | 
			
		||||
		 * mark it as accessed the first time.
 | 
			
		||||
		 */
 | 
			
		||||
		if (iocb->ki_pos >> PAGE_SHIFT !=
 | 
			
		||||
		    ra->prev_pos >> PAGE_SHIFT)
 | 
			
		||||
			mark_page_accessed(pages[0]);
 | 
			
		||||
		for (i = 1; i < pg_nr; i++)
 | 
			
		||||
			mark_page_accessed(pages[i]);
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < pg_nr; i++) {
 | 
			
		||||
			unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
 | 
			
		||||
			unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
 | 
			
		||||
						   PAGE_SIZE - offset);
 | 
			
		||||
			unsigned int copied;
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If users can be writing to this page using arbitrary
 | 
			
		||||
			 * virtual addresses, take care about potential aliasing
 | 
			
		||||
			 * before reading the page on the kernel side.
 | 
			
		||||
			 */
 | 
			
		||||
			if (writably_mapped)
 | 
			
		||||
				flush_dcache_page(pages[i]);
 | 
			
		||||
 | 
			
		||||
			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
 | 
			
		||||
 | 
			
		||||
			written += copied;
 | 
			
		||||
			iocb->ki_pos += copied;
 | 
			
		||||
			ra->prev_pos = iocb->ki_pos;
 | 
			
		||||
 | 
			
		||||
			if (copied < bytes) {
 | 
			
		||||
				error = -EFAULT;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
put_pages:
 | 
			
		||||
		for (i = 0; i < pg_nr; i++)
 | 
			
		||||
			put_page(pages[i]);
 | 
			
		||||
	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 | 
			
		||||
 | 
			
		||||
		error = generic_file_buffered_read_page_ok(iocb, iter, page);
 | 
			
		||||
		put_page(page);
 | 
			
		||||
 | 
			
		||||
		if (error) {
 | 
			
		||||
			if (error > 0)
 | 
			
		||||
				error = 0;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
would_block:
 | 
			
		||||
	error = -EAGAIN;
 | 
			
		||||
out:
 | 
			
		||||
	file_accessed(filp);
 | 
			
		||||
	written += orig_count - iov_iter_count(iter);
 | 
			
		||||
 | 
			
		||||
	if (pages != pages_onstack)
 | 
			
		||||
		kfree(pages);
 | 
			
		||||
 | 
			
		||||
	return written ? written : error;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue