forked from mirrors/linux
		
	new iov_iter flavour: pipe-backed
iov_iter variant for passing data into pipe. copy_to_iter() copies data into page(s) it has allocated and stuffs them into the pipe; copy_page_to_iter() stuffs there a reference to the page given to it. Both will try to coalesce if possible. iov_iter_zero() is similar to copy_to_iter(); iov_iter_get_pages() and friends will do as copy_to_iter() would have and return the pages where the data would've been copied. iov_iter_advance() will truncate everything past the spot it has advanced to. New primitive: iov_iter_pipe(), used for initializing those. pipe should be locked all along. Running out of space acts as fault would for iovec-backed ones; in other words, giving it to ->read_iter() may result in short read if the pipe overflows, or -EFAULT if it happens with nothing copied there. In other words, ->read_iter() on those acts pretty much like ->splice_read(). Moreover, all generic_file_splice_read() users, as well as many other ->splice_read() instances can be switched to that scheme - that'll happen in the next commit. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
		
							parent
							
								
									d82718e348
								
							
						
					
					
						commit
						241699cd72
					
				
					 4 changed files with 408 additions and 6 deletions
				
			
		|  | @ -524,7 +524,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(generic_file_splice_read); | EXPORT_SYMBOL(generic_file_splice_read); | ||||||
| 
 | 
 | ||||||
| static const struct pipe_buf_operations default_pipe_buf_ops = { | const struct pipe_buf_operations default_pipe_buf_ops = { | ||||||
| 	.can_merge = 0, | 	.can_merge = 0, | ||||||
| 	.confirm = generic_pipe_buf_confirm, | 	.confirm = generic_pipe_buf_confirm, | ||||||
| 	.release = generic_pipe_buf_release, | 	.release = generic_pipe_buf_release, | ||||||
|  |  | ||||||
|  | @ -85,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); | ||||||
| extern void spd_release_page(struct splice_pipe_desc *, unsigned int); | extern void spd_release_page(struct splice_pipe_desc *, unsigned int); | ||||||
| 
 | 
 | ||||||
| extern const struct pipe_buf_operations page_cache_pipe_buf_ops; | extern const struct pipe_buf_operations page_cache_pipe_buf_ops; | ||||||
|  | extern const struct pipe_buf_operations default_pipe_buf_ops; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ | ||||||
| #include <uapi/linux/uio.h> | #include <uapi/linux/uio.h> | ||||||
| 
 | 
 | ||||||
| struct page; | struct page; | ||||||
|  | struct pipe_inode_info; | ||||||
| 
 | 
 | ||||||
| struct kvec { | struct kvec { | ||||||
| 	void *iov_base; /* and that should *never* hold a userland pointer */ | 	void *iov_base; /* and that should *never* hold a userland pointer */ | ||||||
|  | @ -23,6 +24,7 @@ enum { | ||||||
| 	ITER_IOVEC = 0, | 	ITER_IOVEC = 0, | ||||||
| 	ITER_KVEC = 2, | 	ITER_KVEC = 2, | ||||||
| 	ITER_BVEC = 4, | 	ITER_BVEC = 4, | ||||||
|  | 	ITER_PIPE = 8, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct iov_iter { | struct iov_iter { | ||||||
|  | @ -33,8 +35,12 @@ struct iov_iter { | ||||||
| 		const struct iovec *iov; | 		const struct iovec *iov; | ||||||
| 		const struct kvec *kvec; | 		const struct kvec *kvec; | ||||||
| 		const struct bio_vec *bvec; | 		const struct bio_vec *bvec; | ||||||
|  | 		struct pipe_inode_info *pipe; | ||||||
| 	}; | 	}; | ||||||
|  | 	union { | ||||||
| 		unsigned long nr_segs; | 		unsigned long nr_segs; | ||||||
|  | 		int idx; | ||||||
|  | 	}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define iov_for_each(iov, iter, start)				\ | #define iov_for_each(iov, iter, start)				\ | ||||||
| 	if (!((start).type & ITER_BVEC))			\ | 	if (!((start).type & (ITER_BVEC | ITER_PIPE)))		\ | ||||||
| 	for (iter = (start);					\ | 	for (iter = (start);					\ | ||||||
| 	     (iter).count &&					\ | 	     (iter).count &&					\ | ||||||
| 	     ((iov = iov_iter_iovec(&(iter))), 1);		\ | 	     ((iov = iov_iter_iovec(&(iter))), 1);		\ | ||||||
|  | @ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, | ||||||
| 			unsigned long nr_segs, size_t count); | 			unsigned long nr_segs, size_t count); | ||||||
| void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, | void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, | ||||||
| 			unsigned long nr_segs, size_t count); | 			unsigned long nr_segs, size_t count); | ||||||
|  | void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, | ||||||
|  | 			size_t count); | ||||||
| ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, | ||||||
| 			size_t maxsize, unsigned maxpages, size_t *start); | 			size_t maxsize, unsigned maxpages, size_t *start); | ||||||
| ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, | ||||||
|  | @ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i) | ||||||
| 
 | 
 | ||||||
| static inline bool iter_is_iovec(struct iov_iter *i) | static inline bool iter_is_iovec(struct iov_iter *i) | ||||||
| { | { | ||||||
| 	return !(i->type & (ITER_BVEC | ITER_KVEC)); | 	return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  | ||||||
							
								
								
									
										397
									
								
								lib/iov_iter.c
									
									
									
									
									
								
							
							
						
						
									
										397
									
								
								lib/iov_iter.c
									
									
									
									
									
								
							|  | @ -3,8 +3,11 @@ | ||||||
| #include <linux/pagemap.h> | #include <linux/pagemap.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/vmalloc.h> | #include <linux/vmalloc.h> | ||||||
|  | #include <linux/splice.h> | ||||||
| #include <net/checksum.h> | #include <net/checksum.h> | ||||||
| 
 | 
 | ||||||
|  | #define PIPE_PARANOIA /* for now */ | ||||||
|  | 
 | ||||||
| #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\ | #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\ | ||||||
| 	size_t left;					\ | 	size_t left;					\ | ||||||
| 	size_t wanted = n;				\ | 	size_t wanted = n;				\ | ||||||
|  | @ -290,6 +293,93 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t | ||||||
| 	return wanted - bytes; | 	return wanted - bytes; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef PIPE_PARANOIA | ||||||
|  | static bool sanity(const struct iov_iter *i) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	int idx = i->idx; | ||||||
|  | 	int next = pipe->curbuf + pipe->nrbufs; | ||||||
|  | 	if (i->iov_offset) { | ||||||
|  | 		struct pipe_buffer *p; | ||||||
|  | 		if (unlikely(!pipe->nrbufs)) | ||||||
|  | 			goto Bad;	// pipe must be non-empty
 | ||||||
|  | 		if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) | ||||||
|  | 			goto Bad;	// must be at the last buffer...
 | ||||||
|  | 
 | ||||||
|  | 		p = &pipe->bufs[idx]; | ||||||
|  | 		if (unlikely(p->offset + p->len != i->iov_offset)) | ||||||
|  | 			goto Bad;	// ... at the end of segment
 | ||||||
|  | 	} else { | ||||||
|  | 		if (idx != (next & (pipe->buffers - 1))) | ||||||
|  | 			goto Bad;	// must be right after the last buffer
 | ||||||
|  | 	} | ||||||
|  | 	return true; | ||||||
|  | Bad: | ||||||
|  | 	printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); | ||||||
|  | 	printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", | ||||||
|  | 			pipe->curbuf, pipe->nrbufs, pipe->buffers); | ||||||
|  | 	for (idx = 0; idx < pipe->buffers; idx++) | ||||||
|  | 		printk(KERN_ERR "[%p %p %d %d]\n", | ||||||
|  | 			pipe->bufs[idx].ops, | ||||||
|  | 			pipe->bufs[idx].page, | ||||||
|  | 			pipe->bufs[idx].offset, | ||||||
|  | 			pipe->bufs[idx].len); | ||||||
|  | 	WARN_ON(1); | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | #define sanity(i) true | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | static inline int next_idx(int idx, struct pipe_inode_info *pipe) | ||||||
|  | { | ||||||
|  | 	return (idx + 1) & (pipe->buffers - 1); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, | ||||||
|  | 			 struct iov_iter *i) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	struct pipe_buffer *buf; | ||||||
|  | 	size_t off; | ||||||
|  | 	int idx; | ||||||
|  | 
 | ||||||
|  | 	if (unlikely(bytes > i->count)) | ||||||
|  | 		bytes = i->count; | ||||||
|  | 
 | ||||||
|  | 	if (unlikely(!bytes)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	if (!sanity(i)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	off = i->iov_offset; | ||||||
|  | 	idx = i->idx; | ||||||
|  | 	buf = &pipe->bufs[idx]; | ||||||
|  | 	if (off) { | ||||||
|  | 		if (offset == off && buf->page == page) { | ||||||
|  | 			/* merge with the last one */ | ||||||
|  | 			buf->len += bytes; | ||||||
|  | 			i->iov_offset += bytes; | ||||||
|  | 			goto out; | ||||||
|  | 		} | ||||||
|  | 		idx = next_idx(idx, pipe); | ||||||
|  | 		buf = &pipe->bufs[idx]; | ||||||
|  | 	} | ||||||
|  | 	if (idx == pipe->curbuf && pipe->nrbufs) | ||||||
|  | 		return 0; | ||||||
|  | 	pipe->nrbufs++; | ||||||
|  | 	buf->ops = &page_cache_pipe_buf_ops; | ||||||
|  | 	get_page(buf->page = page); | ||||||
|  | 	buf->offset = offset; | ||||||
|  | 	buf->len = bytes; | ||||||
|  | 	i->iov_offset = offset + bytes; | ||||||
|  | 	i->idx = idx; | ||||||
|  | out: | ||||||
|  | 	i->count -= bytes; | ||||||
|  | 	return bytes; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Fault in one or more iovecs of the given iov_iter, to a maximum length of |  * Fault in one or more iovecs of the given iov_iter, to a maximum length of | ||||||
|  * bytes.  For each iovec, fault in each page that constitutes the iovec. |  * bytes.  For each iovec, fault in each page that constitutes the iovec. | ||||||
|  | @ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len) | ||||||
| 	kunmap_atomic(addr); | 	kunmap_atomic(addr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline bool allocated(struct pipe_buffer *buf) | ||||||
|  | { | ||||||
|  | 	return buf->ops == &default_pipe_buf_ops; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) | ||||||
|  | { | ||||||
|  | 	size_t off = i->iov_offset; | ||||||
|  | 	int idx = i->idx; | ||||||
|  | 	if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { | ||||||
|  | 		idx = next_idx(idx, i->pipe); | ||||||
|  | 		off = 0; | ||||||
|  | 	} | ||||||
|  | 	*idxp = idx; | ||||||
|  | 	*offp = off; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static size_t push_pipe(struct iov_iter *i, size_t size, | ||||||
|  | 			int *idxp, size_t *offp) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	size_t off; | ||||||
|  | 	int idx; | ||||||
|  | 	ssize_t left; | ||||||
|  | 
 | ||||||
|  | 	if (unlikely(size > i->count)) | ||||||
|  | 		size = i->count; | ||||||
|  | 	if (unlikely(!size)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	left = size; | ||||||
|  | 	data_start(i, &idx, &off); | ||||||
|  | 	*idxp = idx; | ||||||
|  | 	*offp = off; | ||||||
|  | 	if (off) { | ||||||
|  | 		left -= PAGE_SIZE - off; | ||||||
|  | 		if (left <= 0) { | ||||||
|  | 			pipe->bufs[idx].len += size; | ||||||
|  | 			return size; | ||||||
|  | 		} | ||||||
|  | 		pipe->bufs[idx].len = PAGE_SIZE; | ||||||
|  | 		idx = next_idx(idx, pipe); | ||||||
|  | 	} | ||||||
|  | 	while (idx != pipe->curbuf || !pipe->nrbufs) { | ||||||
|  | 		struct page *page = alloc_page(GFP_USER); | ||||||
|  | 		if (!page) | ||||||
|  | 			break; | ||||||
|  | 		pipe->nrbufs++; | ||||||
|  | 		pipe->bufs[idx].ops = &default_pipe_buf_ops; | ||||||
|  | 		pipe->bufs[idx].page = page; | ||||||
|  | 		pipe->bufs[idx].offset = 0; | ||||||
|  | 		if (left <= PAGE_SIZE) { | ||||||
|  | 			pipe->bufs[idx].len = left; | ||||||
|  | 			return size; | ||||||
|  | 		} | ||||||
|  | 		pipe->bufs[idx].len = PAGE_SIZE; | ||||||
|  | 		left -= PAGE_SIZE; | ||||||
|  | 		idx = next_idx(idx, pipe); | ||||||
|  | 	} | ||||||
|  | 	return size - left; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static size_t copy_pipe_to_iter(const void *addr, size_t bytes, | ||||||
|  | 				struct iov_iter *i) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	size_t n, off; | ||||||
|  | 	int idx; | ||||||
|  | 
 | ||||||
|  | 	if (!sanity(i)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	bytes = n = push_pipe(i, bytes, &idx, &off); | ||||||
|  | 	if (unlikely(!n)) | ||||||
|  | 		return 0; | ||||||
|  | 	for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||||||
|  | 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||||||
|  | 		memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); | ||||||
|  | 		i->idx = idx; | ||||||
|  | 		i->iov_offset = off + chunk; | ||||||
|  | 		n -= chunk; | ||||||
|  | 		addr += chunk; | ||||||
|  | 	} | ||||||
|  | 	i->count -= bytes; | ||||||
|  | 	return bytes; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) | ||||||
| { | { | ||||||
| 	const char *from = addr; | 	const char *from = addr; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) | ||||||
|  | 		return copy_pipe_to_iter(addr, bytes, i); | ||||||
| 	iterate_and_advance(i, bytes, v, | 	iterate_and_advance(i, bytes, v, | ||||||
| 		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | 		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | ||||||
| 			       v.iov_len), | 			       v.iov_len), | ||||||
|  | @ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter); | ||||||
| size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | ||||||
| { | { | ||||||
| 	char *to = addr; | 	char *to = addr; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	iterate_and_advance(i, bytes, v, | 	iterate_and_advance(i, bytes, v, | ||||||
| 		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | 		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | ||||||
| 				 v.iov_len), | 				 v.iov_len), | ||||||
|  | @ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter); | ||||||
| size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | ||||||
| { | { | ||||||
| 	char *to = addr; | 	char *to = addr; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	iterate_and_advance(i, bytes, v, | 	iterate_and_advance(i, bytes, v, | ||||||
| 		__copy_from_user_nocache((to += v.iov_len) - v.iov_len, | 		__copy_from_user_nocache((to += v.iov_len) - v.iov_len, | ||||||
| 					 v.iov_base, v.iov_len), | 					 v.iov_base, v.iov_len), | ||||||
|  | @ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | ||||||
| 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | ||||||
| 		kunmap_atomic(kaddr); | 		kunmap_atomic(kaddr); | ||||||
| 		return wanted; | 		return wanted; | ||||||
| 	} else | 	} else if (likely(!(i->type & ITER_PIPE))) | ||||||
| 		return copy_page_to_iter_iovec(page, offset, bytes, i); | 		return copy_page_to_iter_iovec(page, offset, bytes, i); | ||||||
|  | 	else | ||||||
|  | 		return copy_page_to_iter_pipe(page, offset, bytes, i); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(copy_page_to_iter); | EXPORT_SYMBOL(copy_page_to_iter); | ||||||
| 
 | 
 | ||||||
| size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | ||||||
| 			 struct iov_iter *i) | 			 struct iov_iter *i) | ||||||
| { | { | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	if (i->type & (ITER_BVEC|ITER_KVEC)) { | 	if (i->type & (ITER_BVEC|ITER_KVEC)) { | ||||||
| 		void *kaddr = kmap_atomic(page); | 		void *kaddr = kmap_atomic(page); | ||||||
| 		size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | 		size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | ||||||
|  | @ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(copy_page_from_iter); | EXPORT_SYMBOL(copy_page_from_iter); | ||||||
| 
 | 
 | ||||||
|  | static size_t pipe_zero(size_t bytes, struct iov_iter *i) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	size_t n, off; | ||||||
|  | 	int idx; | ||||||
|  | 
 | ||||||
|  | 	if (!sanity(i)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	bytes = n = push_pipe(i, bytes, &idx, &off); | ||||||
|  | 	if (unlikely(!n)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||||||
|  | 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||||||
|  | 		memzero_page(pipe->bufs[idx].page, off, chunk); | ||||||
|  | 		i->idx = idx; | ||||||
|  | 		i->iov_offset = off + chunk; | ||||||
|  | 		n -= chunk; | ||||||
|  | 	} | ||||||
|  | 	i->count -= bytes; | ||||||
|  | 	return bytes; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | ||||||
| { | { | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) | ||||||
|  | 		return pipe_zero(bytes, i); | ||||||
| 	iterate_and_advance(i, bytes, v, | 	iterate_and_advance(i, bytes, v, | ||||||
| 		__clear_user(v.iov_base, v.iov_len), | 		__clear_user(v.iov_base, v.iov_len), | ||||||
| 		memzero_page(v.bv_page, v.bv_offset, v.bv_len), | 		memzero_page(v.bv_page, v.bv_offset, v.bv_len), | ||||||
|  | @ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | ||||||
| 		struct iov_iter *i, unsigned long offset, size_t bytes) | 		struct iov_iter *i, unsigned long offset, size_t bytes) | ||||||
| { | { | ||||||
| 	char *kaddr = kmap_atomic(page), *p = kaddr + offset; | 	char *kaddr = kmap_atomic(page), *p = kaddr + offset; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		kunmap_atomic(kaddr); | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	iterate_all_kinds(i, bytes, v, | 	iterate_all_kinds(i, bytes, v, | ||||||
| 		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | 		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | ||||||
| 					  v.iov_base, v.iov_len), | 					  v.iov_base, v.iov_len), | ||||||
|  | @ -455,8 +679,51 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | ||||||
| 
 | 
 | ||||||
|  | static void pipe_advance(struct iov_iter *i, size_t size) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	struct pipe_buffer *buf; | ||||||
|  | 	int idx = i->idx; | ||||||
|  | 	size_t off = i->iov_offset; | ||||||
|  | 	 | ||||||
|  | 	if (unlikely(i->count < size)) | ||||||
|  | 		size = i->count; | ||||||
|  | 
 | ||||||
|  | 	if (size) { | ||||||
|  | 		if (off) /* make it relative to the beginning of buffer */ | ||||||
|  | 			size += off - pipe->bufs[idx].offset; | ||||||
|  | 		while (1) { | ||||||
|  | 			buf = &pipe->bufs[idx]; | ||||||
|  | 			if (size <= buf->len) | ||||||
|  | 				break; | ||||||
|  | 			size -= buf->len; | ||||||
|  | 			idx = next_idx(idx, pipe); | ||||||
|  | 		} | ||||||
|  | 		buf->len = size; | ||||||
|  | 		i->idx = idx; | ||||||
|  | 		off = i->iov_offset = buf->offset + size; | ||||||
|  | 	} | ||||||
|  | 	if (off) | ||||||
|  | 		idx = next_idx(idx, pipe); | ||||||
|  | 	if (pipe->nrbufs) { | ||||||
|  | 		int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||||||
|  | 		/* [curbuf,unused) is in use.  Free [idx,unused) */ | ||||||
|  | 		while (idx != unused) { | ||||||
|  | 			buf = &pipe->bufs[idx]; | ||||||
|  | 			buf->ops->release(pipe, buf); | ||||||
|  | 			buf->ops = NULL; | ||||||
|  | 			idx = next_idx(idx, pipe); | ||||||
|  | 			pipe->nrbufs--; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void iov_iter_advance(struct iov_iter *i, size_t size) | void iov_iter_advance(struct iov_iter *i, size_t size) | ||||||
| { | { | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		pipe_advance(i, size); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
| 	iterate_and_advance(i, size, v, 0, 0, 0) | 	iterate_and_advance(i, size, v, 0, 0, 0) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(iov_iter_advance); | EXPORT_SYMBOL(iov_iter_advance); | ||||||
|  | @ -466,6 +733,8 @@ EXPORT_SYMBOL(iov_iter_advance); | ||||||
|  */ |  */ | ||||||
| size_t iov_iter_single_seg_count(const struct iov_iter *i) | size_t iov_iter_single_seg_count(const struct iov_iter *i) | ||||||
| { | { | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) | ||||||
|  | 		return i->count;	// it is a silly place, anyway
 | ||||||
| 	if (i->nr_segs == 1) | 	if (i->nr_segs == 1) | ||||||
| 		return i->count; | 		return i->count; | ||||||
| 	else if (i->type & ITER_BVEC) | 	else if (i->type & ITER_BVEC) | ||||||
|  | @ -501,6 +770,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(iov_iter_bvec); | EXPORT_SYMBOL(iov_iter_bvec); | ||||||
| 
 | 
 | ||||||
|  | void iov_iter_pipe(struct iov_iter *i, int direction, | ||||||
|  | 			struct pipe_inode_info *pipe, | ||||||
|  | 			size_t count) | ||||||
|  | { | ||||||
|  | 	BUG_ON(direction != ITER_PIPE); | ||||||
|  | 	i->type = direction; | ||||||
|  | 	i->pipe = pipe; | ||||||
|  | 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||||||
|  | 	i->iov_offset = 0; | ||||||
|  | 	i->count = count; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(iov_iter_pipe); | ||||||
|  | 
 | ||||||
| unsigned long iov_iter_alignment(const struct iov_iter *i) | unsigned long iov_iter_alignment(const struct iov_iter *i) | ||||||
| { | { | ||||||
| 	unsigned long res = 0; | 	unsigned long res = 0; | ||||||
|  | @ -509,6 +791,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | ||||||
| 	if (!size) | 	if (!size) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | ||||||
|  | 			return size | i->iov_offset; | ||||||
|  | 		return size; | ||||||
|  | 	} | ||||||
| 	iterate_all_kinds(i, size, v, | 	iterate_all_kinds(i, size, v, | ||||||
| 		(res |= (unsigned long)v.iov_base | v.iov_len, 0), | 		(res |= (unsigned long)v.iov_base | v.iov_len, 0), | ||||||
| 		res |= v.bv_offset | v.bv_len, | 		res |= v.bv_offset | v.bv_len, | ||||||
|  | @ -525,6 +812,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | ||||||
| 	if (!size) | 	if (!size) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return ~0U; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	iterate_all_kinds(i, size, v, | 	iterate_all_kinds(i, size, v, | ||||||
| 		(res |= (!res ? 0 : (unsigned long)v.iov_base) | | 		(res |= (!res ? 0 : (unsigned long)v.iov_base) | | ||||||
| 			(size != v.iov_len ? size : 0), 0), | 			(size != v.iov_len ? size : 0), 0), | ||||||
|  | @ -537,6 +829,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(iov_iter_gap_alignment); | EXPORT_SYMBOL(iov_iter_gap_alignment); | ||||||
| 
 | 
 | ||||||
|  | static inline size_t __pipe_get_pages(struct iov_iter *i, | ||||||
|  | 				size_t maxsize, | ||||||
|  | 				struct page **pages, | ||||||
|  | 				int idx, | ||||||
|  | 				size_t *start) | ||||||
|  | { | ||||||
|  | 	struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 	size_t n = push_pipe(i, maxsize, &idx, start); | ||||||
|  | 	if (!n) | ||||||
|  | 		return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 	maxsize = n; | ||||||
|  | 	n += *start; | ||||||
|  | 	while (n >= PAGE_SIZE) { | ||||||
|  | 		get_page(*pages++ = pipe->bufs[idx].page); | ||||||
|  | 		idx = next_idx(idx, pipe); | ||||||
|  | 		n -= PAGE_SIZE; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return maxsize; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t pipe_get_pages(struct iov_iter *i, | ||||||
|  | 		   struct page **pages, size_t maxsize, unsigned maxpages, | ||||||
|  | 		   size_t *start) | ||||||
|  | { | ||||||
|  | 	unsigned npages; | ||||||
|  | 	size_t capacity; | ||||||
|  | 	int idx; | ||||||
|  | 
 | ||||||
|  | 	if (!sanity(i)) | ||||||
|  | 		return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 	data_start(i, &idx, start); | ||||||
|  | 	/* some of this one + all after this one */ | ||||||
|  | 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||||||
|  | 	capacity = min(npages,maxpages) * PAGE_SIZE - *start; | ||||||
|  | 
 | ||||||
|  | 	return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| ssize_t iov_iter_get_pages(struct iov_iter *i, | ssize_t iov_iter_get_pages(struct iov_iter *i, | ||||||
| 		   struct page **pages, size_t maxsize, unsigned maxpages, | 		   struct page **pages, size_t maxsize, unsigned maxpages, | ||||||
| 		   size_t *start) | 		   size_t *start) | ||||||
|  | @ -547,6 +880,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | ||||||
| 	if (!maxsize) | 	if (!maxsize) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) | ||||||
|  | 		return pipe_get_pages(i, pages, maxsize, maxpages, start); | ||||||
| 	iterate_all_kinds(i, maxsize, v, ({ | 	iterate_all_kinds(i, maxsize, v, ({ | ||||||
| 		unsigned long addr = (unsigned long)v.iov_base; | 		unsigned long addr = (unsigned long)v.iov_base; | ||||||
| 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | ||||||
|  | @ -582,6 +917,37 @@ static struct page **get_pages_array(size_t n) | ||||||
| 	return p; | 	return p; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | ||||||
|  | 		   struct page ***pages, size_t maxsize, | ||||||
|  | 		   size_t *start) | ||||||
|  | { | ||||||
|  | 	struct page **p; | ||||||
|  | 	size_t n; | ||||||
|  | 	int idx; | ||||||
|  | 	int npages; | ||||||
|  | 
 | ||||||
|  | 	if (!sanity(i)) | ||||||
|  | 		return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 	data_start(i, &idx, start); | ||||||
|  | 	/* some of this one + all after this one */ | ||||||
|  | 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||||||
|  | 	n = npages * PAGE_SIZE - *start; | ||||||
|  | 	if (maxsize > n) | ||||||
|  | 		maxsize = n; | ||||||
|  | 	else | ||||||
|  | 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); | ||||||
|  | 	p = get_pages_array(npages); | ||||||
|  | 	if (!p) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	n = __pipe_get_pages(i, maxsize, p, idx, start); | ||||||
|  | 	if (n > 0) | ||||||
|  | 		*pages = p; | ||||||
|  | 	else | ||||||
|  | 		kvfree(p); | ||||||
|  | 	return n; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | ||||||
| 		   struct page ***pages, size_t maxsize, | 		   struct page ***pages, size_t maxsize, | ||||||
| 		   size_t *start) | 		   size_t *start) | ||||||
|  | @ -594,6 +960,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | ||||||
| 	if (!maxsize) | 	if (!maxsize) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) | ||||||
|  | 		return pipe_get_pages_alloc(i, pages, maxsize, start); | ||||||
| 	iterate_all_kinds(i, maxsize, v, ({ | 	iterate_all_kinds(i, maxsize, v, ({ | ||||||
| 		unsigned long addr = (unsigned long)v.iov_base; | 		unsigned long addr = (unsigned long)v.iov_base; | ||||||
| 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | ||||||
|  | @ -635,6 +1003,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | ||||||
| 	__wsum sum, next; | 	__wsum sum, next; | ||||||
| 	size_t off = 0; | 	size_t off = 0; | ||||||
| 	sum = *csum; | 	sum = *csum; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	iterate_and_advance(i, bytes, v, ({ | 	iterate_and_advance(i, bytes, v, ({ | ||||||
| 		int err = 0; | 		int err = 0; | ||||||
| 		next = csum_and_copy_from_user(v.iov_base,  | 		next = csum_and_copy_from_user(v.iov_base,  | ||||||
|  | @ -673,6 +1045,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | ||||||
| 	__wsum sum, next; | 	__wsum sum, next; | ||||||
| 	size_t off = 0; | 	size_t off = 0; | ||||||
| 	sum = *csum; | 	sum = *csum; | ||||||
|  | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1);	/* for now */ | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
| 	iterate_and_advance(i, bytes, v, ({ | 	iterate_and_advance(i, bytes, v, ({ | ||||||
| 		int err = 0; | 		int err = 0; | ||||||
| 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | ||||||
|  | @ -712,7 +1088,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) | ||||||
| 	if (!size) | 	if (!size) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| 	iterate_all_kinds(i, size, v, ({ | 	if (unlikely(i->type & ITER_PIPE)) { | ||||||
|  | 		struct pipe_inode_info *pipe = i->pipe; | ||||||
|  | 		size_t off; | ||||||
|  | 		int idx; | ||||||
|  | 
 | ||||||
|  | 		if (!sanity(i)) | ||||||
|  | 			return 0; | ||||||
|  | 
 | ||||||
|  | 		data_start(i, &idx, &off); | ||||||
|  | 		/* some of this one + all after this one */ | ||||||
|  | 		npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; | ||||||
|  | 		if (npages >= maxpages) | ||||||
|  | 			return maxpages; | ||||||
|  | 	} else iterate_all_kinds(i, size, v, ({ | ||||||
| 		unsigned long p = (unsigned long)v.iov_base; | 		unsigned long p = (unsigned long)v.iov_base; | ||||||
| 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | ||||||
| 			- p / PAGE_SIZE; | 			- p / PAGE_SIZE; | ||||||
|  | @ -737,6 +1126,10 @@ EXPORT_SYMBOL(iov_iter_npages); | ||||||
| const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | ||||||
| { | { | ||||||
| 	*new = *old; | 	*new = *old; | ||||||
|  | 	if (unlikely(new->type & ITER_PIPE)) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
| 	if (new->type & ITER_BVEC) | 	if (new->type & ITER_BVEC) | ||||||
| 		return new->bvec = kmemdup(new->bvec, | 		return new->bvec = kmemdup(new->bvec, | ||||||
| 				    new->nr_segs * sizeof(struct bio_vec), | 				    new->nr_segs * sizeof(struct bio_vec), | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Al Viro
						Al Viro