forked from mirrors/linux
		
	iov_iter: add ITER_BVEC_FLAG_NO_REF flag
For ITER_BVEC, if we're holding on to kernel pages, the caller doesn't need to grab a reference to the bvec pages, and drop that same reference on IO completion. This is essentially safe for any ITER_BVEC, but some use cases end up reusing pages and uncondtionally dropping a page reference on completion. And example of that is sendfile(2), that ends up being a splice_in + splice_out on the pipe pages. Add a flag that tells us it's fine to not grab a page reference to the bvec pages, since that caller knows not to drop a reference when it's done with the pages. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									bf33a7699e
								
							
						
					
					
						commit
						875f1d0769
					
				
					 2 changed files with 22 additions and 5 deletions
				
			
		|  | @ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, | |||
| 	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); | ||||
| 	if (offset) | ||||
| 		iov_iter_advance(iter, offset); | ||||
| 
 | ||||
| 	/* don't drop a reference to these pages */ | ||||
| 	iter->type |= ITER_BVEC_FLAG_NO_REF; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,14 +23,23 @@ struct kvec { | |||
| }; | ||||
| 
 | ||||
| enum iter_type { | ||||
| 	ITER_IOVEC = 0, | ||||
| 	ITER_KVEC = 2, | ||||
| 	ITER_BVEC = 4, | ||||
| 	ITER_PIPE = 8, | ||||
| 	ITER_DISCARD = 16, | ||||
| 	/* set if ITER_BVEC doesn't hold a bv_page ref */ | ||||
| 	ITER_BVEC_FLAG_NO_REF = 2, | ||||
| 
 | ||||
| 	/* iter types */ | ||||
| 	ITER_IOVEC = 4, | ||||
| 	ITER_KVEC = 8, | ||||
| 	ITER_BVEC = 16, | ||||
| 	ITER_PIPE = 32, | ||||
| 	ITER_DISCARD = 64, | ||||
| }; | ||||
| 
 | ||||
| struct iov_iter { | ||||
| 	/*
 | ||||
| 	 * Bit 0 is the read/write bit, set if we're writing. | ||||
| 	 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and | ||||
| 	 * the caller isn't expecting to drop a page reference when done. | ||||
| 	 */ | ||||
| 	unsigned int type; | ||||
| 	size_t iov_offset; | ||||
| 	size_t count; | ||||
|  | @ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) | |||
| 	return i->type & (READ | WRITE); | ||||
| } | ||||
| 
 | ||||
| static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i) | ||||
| { | ||||
| 	return (i->type & ITER_BVEC_FLAG_NO_REF) != 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Total number of bytes covered by an iovec. | ||||
|  * | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe