mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	shmem: Implement splice-read
The new filemap_splice_read() has an implicit expectation via filemap_get_pages() that ->read_folio() exists if ->readahead() doesn't fully populate the pagecache of the file it is reading from[1], potentially leading to a jump to NULL if this doesn't exist. shmem, however, (and by extension, tmpfs, ramfs and rootfs), doesn't have ->read_folio(), Work around this by equipping shmem with its own splice-read implementation, based on filemap_splice_read(), but able to paste in zero_page when there's a page missing. Signed-off-by: David Howells <dhowells@redhat.com> cc: Daniel Golle <daniel@makrotopia.org> cc: Guenter Roeck <groeck7@gmail.com> cc: Christoph Hellwig <hch@lst.de> cc: Jens Axboe <axboe@kernel.dk> cc: Al Viro <viro@zeniv.linux.org.uk> cc: John Hubbard <jhubbard@nvidia.com> cc: David Hildenbrand <david@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Hugh Dickins <hughd@google.com> cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/Y+pdHFFTk1TTEBsO@makrotopia.org/ [1] Link: https://lore.kernel.org/r/20230522135018.2742245-10-dhowells@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									b85930a077
								
							
						
					
					
						commit
						bd194b1871
					
				
					 1 changed files with 133 additions and 1 deletions
				
			
		
							
								
								
									
										134
									
								
								mm/shmem.c
									
									
									
									
									
								
							
							
						
						
									
										134
									
								
								mm/shmem.c
									
									
									
									
									
								
							| 
						 | 
					@ -2731,6 +2731,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 | 
				
			||||||
	return retval ? retval : error;
 | 
						return retval ? retval : error;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
 | 
				
			||||||
 | 
								      struct pipe_buffer *buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
 | 
				
			||||||
 | 
									  struct pipe_buffer *buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
 | 
				
			||||||
 | 
									    struct pipe_buffer *buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return false;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const struct pipe_buf_operations zero_pipe_buf_ops = {
 | 
				
			||||||
 | 
						.release	= zero_pipe_buf_release,
 | 
				
			||||||
 | 
						.try_steal	= zero_pipe_buf_try_steal,
 | 
				
			||||||
 | 
						.get		= zero_pipe_buf_get,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
 | 
				
			||||||
 | 
										loff_t fpos, size_t size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						size_t offset = fpos & ~PAGE_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = min_t(size_t, size, PAGE_SIZE - offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
 | 
				
			||||||
 | 
							struct pipe_buffer *buf = pipe_head_buf(pipe);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							*buf = (struct pipe_buffer) {
 | 
				
			||||||
 | 
								.ops	= &zero_pipe_buf_ops,
 | 
				
			||||||
 | 
								.page	= ZERO_PAGE(0),
 | 
				
			||||||
 | 
								.offset	= offset,
 | 
				
			||||||
 | 
								.len	= size,
 | 
				
			||||||
 | 
							};
 | 
				
			||||||
 | 
							pipe->head++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return size;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 | 
				
			||||||
 | 
									      struct pipe_inode_info *pipe,
 | 
				
			||||||
 | 
									      size_t len, unsigned int flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct inode *inode = file_inode(in);
 | 
				
			||||||
 | 
						struct address_space *mapping = inode->i_mapping;
 | 
				
			||||||
 | 
						struct folio *folio = NULL;
 | 
				
			||||||
 | 
						size_t total_spliced = 0, used, npages, n, part;
 | 
				
			||||||
 | 
						loff_t isize;
 | 
				
			||||||
 | 
						int error = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Work out how much data we can actually add into the pipe */
 | 
				
			||||||
 | 
						used = pipe_occupancy(pipe->head, pipe->tail);
 | 
				
			||||||
 | 
						npages = max_t(ssize_t, pipe->max_usage - used, 0);
 | 
				
			||||||
 | 
						len = min_t(size_t, len, npages * PAGE_SIZE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							if (*ppos >= i_size_read(inode))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
 | 
				
			||||||
 | 
							if (error) {
 | 
				
			||||||
 | 
								if (error == -EINVAL)
 | 
				
			||||||
 | 
									error = 0;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if (folio) {
 | 
				
			||||||
 | 
								folio_unlock(folio);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (folio_test_hwpoison(folio)) {
 | 
				
			||||||
 | 
									error = -EIO;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * i_size must be checked after we know the pages are Uptodate.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * Checking i_size after the check allows us to calculate
 | 
				
			||||||
 | 
							 * the correct value for "nr", which means the zero-filled
 | 
				
			||||||
 | 
							 * part of the page is not copied back to userspace (unless
 | 
				
			||||||
 | 
							 * another truncate extends the file - this is desired though).
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							isize = i_size_read(inode);
 | 
				
			||||||
 | 
							if (unlikely(*ppos >= isize))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							part = min_t(loff_t, isize - *ppos, len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (folio) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * If users can be writing to this page using arbitrary
 | 
				
			||||||
 | 
								 * virtual addresses, take care about potential aliasing
 | 
				
			||||||
 | 
								 * before reading the page on the kernel side.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (mapping_writably_mapped(mapping))
 | 
				
			||||||
 | 
									flush_dcache_folio(folio);
 | 
				
			||||||
 | 
								folio_mark_accessed(folio);
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * Ok, we have the page, and it's up-to-date, so we can
 | 
				
			||||||
 | 
								 * now splice it into the pipe.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								n = splice_folio_into_pipe(pipe, folio, *ppos, part);
 | 
				
			||||||
 | 
								folio_put(folio);
 | 
				
			||||||
 | 
								folio = NULL;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								n = splice_zeropage_into_pipe(pipe, *ppos, len);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!n)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							len -= n;
 | 
				
			||||||
 | 
							total_spliced += n;
 | 
				
			||||||
 | 
							*ppos += n;
 | 
				
			||||||
 | 
							in->f_ra.prev_pos = *ppos;
 | 
				
			||||||
 | 
							if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							cond_resched();
 | 
				
			||||||
 | 
						} while (len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (folio)
 | 
				
			||||||
 | 
							folio_put(folio);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						file_accessed(in);
 | 
				
			||||||
 | 
						return total_spliced ? total_spliced : error;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 | 
					static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct address_space *mapping = file->f_mapping;
 | 
						struct address_space *mapping = file->f_mapping;
 | 
				
			||||||
| 
						 | 
					@ -3971,7 +4103,7 @@ static const struct file_operations shmem_file_operations = {
 | 
				
			||||||
	.read_iter	= shmem_file_read_iter,
 | 
						.read_iter	= shmem_file_read_iter,
 | 
				
			||||||
	.write_iter	= generic_file_write_iter,
 | 
						.write_iter	= generic_file_write_iter,
 | 
				
			||||||
	.fsync		= noop_fsync,
 | 
						.fsync		= noop_fsync,
 | 
				
			||||||
	.splice_read	= generic_file_splice_read,
 | 
						.splice_read	= shmem_file_splice_read,
 | 
				
			||||||
	.splice_write	= iter_file_splice_write,
 | 
						.splice_write	= iter_file_splice_write,
 | 
				
			||||||
	.fallocate	= shmem_fallocate,
 | 
						.fallocate	= shmem_fallocate,
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue