forked from mirrors/linux
		
	sock: add MSG_ZEROCOPY
The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									3ece782693
								
							
						
					
					
						commit
						52267790ef
					
				
					 7 changed files with 248 additions and 34 deletions
				
			
		| 
						 | 
					@ -429,6 +429,7 @@ enum {
 | 
				
			||||||
	SKBTX_SCHED_TSTAMP = 1 << 6,
 | 
						SKBTX_SCHED_TSTAMP = 1 << 6,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SKBTX_ZEROCOPY_FRAG	(SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
 | 
				
			||||||
#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 | 
					#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 | 
				
			||||||
				 SKBTX_SCHED_TSTAMP)
 | 
									 SKBTX_SCHED_TSTAMP)
 | 
				
			||||||
#define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 | 
					#define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 | 
				
			||||||
| 
						 | 
					@ -445,8 +446,28 @@ struct ubuf_info {
 | 
				
			||||||
	void (*callback)(struct ubuf_info *, bool zerocopy_success);
 | 
						void (*callback)(struct ubuf_info *, bool zerocopy_success);
 | 
				
			||||||
	void *ctx;
 | 
						void *ctx;
 | 
				
			||||||
	unsigned long desc;
 | 
						unsigned long desc;
 | 
				
			||||||
 | 
						u16 zerocopy:1;
 | 
				
			||||||
 | 
						atomic_t refcnt;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void sock_zerocopy_get(struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						atomic_inc(&uarg->refcnt);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sock_zerocopy_put(struct ubuf_info *uarg);
 | 
				
			||||||
 | 
					void sock_zerocopy_put_abort(struct ubuf_info *uarg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
								     struct msghdr *msg, int len,
 | 
				
			||||||
 | 
								     struct ubuf_info *uarg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* This data is invariant across clones and lives at
 | 
					/* This data is invariant across clones and lives at
 | 
				
			||||||
 * the end of the header data, ie. at skb->end.
 | 
					 * the end of the header data, ie. at skb->end.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 | 
				
			||||||
	return &skb_shinfo(skb)->hwtstamps;
 | 
						return &skb_shinfo(skb)->hwtstamps;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return is_zcopy ? skb_uarg(skb) : NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (skb && uarg && !skb_zcopy(skb)) {
 | 
				
			||||||
 | 
							sock_zerocopy_get(uarg);
 | 
				
			||||||
 | 
							skb_shinfo(skb)->destructor_arg = uarg;
 | 
				
			||||||
 | 
							skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Release a reference on a zerocopy structure */
 | 
				
			||||||
 | 
					static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ubuf_info *uarg = skb_zcopy(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (uarg) {
 | 
				
			||||||
 | 
							uarg->zerocopy = uarg->zerocopy && zerocopy;
 | 
				
			||||||
 | 
							sock_zerocopy_put(uarg);
 | 
				
			||||||
 | 
							skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Abort a zerocopy operation and revert zckey on error in send syscall */
 | 
				
			||||||
 | 
					static inline void skb_zcopy_abort(struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ubuf_info *uarg = skb_zcopy(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (uarg) {
 | 
				
			||||||
 | 
							sock_zerocopy_put_abort(uarg);
 | 
				
			||||||
 | 
							skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *	skb_queue_empty - check if a queue is empty
 | 
					 *	skb_queue_empty - check if a queue is empty
 | 
				
			||||||
 *	@list: queue head
 | 
					 *	@list: queue head
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -287,6 +287,7 @@ struct ucred {
 | 
				
			||||||
#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 | 
					#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 | 
				
			||||||
#define MSG_EOF         MSG_FIN
 | 
					#define MSG_EOF         MSG_FIN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 | 
				
			||||||
#define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 | 
					#define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 | 
				
			||||||
#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 | 
					#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 | 
				
			||||||
					   descriptor received through
 | 
										   descriptor received through
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -294,6 +294,7 @@ struct sock_common {
 | 
				
			||||||
  *	@sk_stamp: time stamp of last packet received
 | 
					  *	@sk_stamp: time stamp of last packet received
 | 
				
			||||||
  *	@sk_tsflags: SO_TIMESTAMPING socket options
 | 
					  *	@sk_tsflags: SO_TIMESTAMPING socket options
 | 
				
			||||||
  *	@sk_tskey: counter to disambiguate concurrent tstamp requests
 | 
					  *	@sk_tskey: counter to disambiguate concurrent tstamp requests
 | 
				
			||||||
 | 
					  *	@sk_zckey: counter to order MSG_ZEROCOPY notifications
 | 
				
			||||||
  *	@sk_socket: Identd and reporting IO signals
 | 
					  *	@sk_socket: Identd and reporting IO signals
 | 
				
			||||||
  *	@sk_user_data: RPC layer private data
 | 
					  *	@sk_user_data: RPC layer private data
 | 
				
			||||||
  *	@sk_frag: cached page frag
 | 
					  *	@sk_frag: cached page frag
 | 
				
			||||||
| 
						 | 
					@ -462,6 +463,7 @@ struct sock {
 | 
				
			||||||
	u16			sk_tsflags;
 | 
						u16			sk_tsflags;
 | 
				
			||||||
	u8			sk_shutdown;
 | 
						u8			sk_shutdown;
 | 
				
			||||||
	u32			sk_tskey;
 | 
						u32			sk_tskey;
 | 
				
			||||||
 | 
						atomic_t		sk_zckey;
 | 
				
			||||||
	struct socket		*sk_socket;
 | 
						struct socket		*sk_socket;
 | 
				
			||||||
	void			*sk_user_data;
 | 
						void			*sk_user_data;
 | 
				
			||||||
#ifdef CONFIG_SECURITY
 | 
					#ifdef CONFIG_SECURITY
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,10 +18,13 @@ struct sock_extended_err {
 | 
				
			||||||
#define SO_EE_ORIGIN_ICMP	2
 | 
					#define SO_EE_ORIGIN_ICMP	2
 | 
				
			||||||
#define SO_EE_ORIGIN_ICMP6	3
 | 
					#define SO_EE_ORIGIN_ICMP6	3
 | 
				
			||||||
#define SO_EE_ORIGIN_TXSTATUS	4
 | 
					#define SO_EE_ORIGIN_TXSTATUS	4
 | 
				
			||||||
 | 
					#define SO_EE_ORIGIN_ZEROCOPY	5
 | 
				
			||||||
#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 | 
					#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 | 
					#define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SO_EE_CODE_ZEROCOPY_COPIED	1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *	struct scm_timestamping - timestamps exposed through cmsg
 | 
					 *	struct scm_timestamping - timestamps exposed through cmsg
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -573,6 +573,51 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 | 
					EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
								    struct iov_iter *from, size_t length)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int frag = skb_shinfo(skb)->nr_frags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (length && iov_iter_count(from)) {
 | 
				
			||||||
 | 
							struct page *pages[MAX_SKB_FRAGS];
 | 
				
			||||||
 | 
							size_t start;
 | 
				
			||||||
 | 
							ssize_t copied;
 | 
				
			||||||
 | 
							unsigned long truesize;
 | 
				
			||||||
 | 
							int n = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (frag == MAX_SKB_FRAGS)
 | 
				
			||||||
 | 
								return -EMSGSIZE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							copied = iov_iter_get_pages(from, pages, length,
 | 
				
			||||||
 | 
										    MAX_SKB_FRAGS - frag, &start);
 | 
				
			||||||
 | 
							if (copied < 0)
 | 
				
			||||||
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							iov_iter_advance(from, copied);
 | 
				
			||||||
 | 
							length -= copied;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							truesize = PAGE_ALIGN(copied + start);
 | 
				
			||||||
 | 
							skb->data_len += copied;
 | 
				
			||||||
 | 
							skb->len += copied;
 | 
				
			||||||
 | 
							skb->truesize += truesize;
 | 
				
			||||||
 | 
							if (sk && sk->sk_type == SOCK_STREAM) {
 | 
				
			||||||
 | 
								sk->sk_wmem_queued += truesize;
 | 
				
			||||||
 | 
								sk_mem_charge(sk, truesize);
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							while (copied) {
 | 
				
			||||||
 | 
								int size = min_t(int, copied, PAGE_SIZE - start);
 | 
				
			||||||
 | 
								skb_fill_page_desc(skb, frag++, pages[n], start, size);
 | 
				
			||||||
 | 
								start = 0;
 | 
				
			||||||
 | 
								copied -= size;
 | 
				
			||||||
 | 
								n++;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__zerocopy_sg_from_iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
 | 
					 *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
 | 
				
			||||||
 *	@skb: buffer to copy
 | 
					 *	@skb: buffer to copy
 | 
				
			||||||
| 
						 | 
					@ -585,45 +630,13 @@ EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 | 
					int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int len = iov_iter_count(from);
 | 
						int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
 | 
				
			||||||
	int copy = min_t(int, skb_headlen(skb), len);
 | 
					 | 
				
			||||||
	int frag = 0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* copy up to skb headlen */
 | 
						/* copy up to skb headlen */
 | 
				
			||||||
	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
 | 
						if (skb_copy_datagram_from_iter(skb, 0, from, copy))
 | 
				
			||||||
		return -EFAULT;
 | 
							return -EFAULT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (iov_iter_count(from)) {
 | 
						return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
 | 
				
			||||||
		struct page *pages[MAX_SKB_FRAGS];
 | 
					 | 
				
			||||||
		size_t start;
 | 
					 | 
				
			||||||
		ssize_t copied;
 | 
					 | 
				
			||||||
		unsigned long truesize;
 | 
					 | 
				
			||||||
		int n = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (frag == MAX_SKB_FRAGS)
 | 
					 | 
				
			||||||
			return -EMSGSIZE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		copied = iov_iter_get_pages(from, pages, ~0U,
 | 
					 | 
				
			||||||
					    MAX_SKB_FRAGS - frag, &start);
 | 
					 | 
				
			||||||
		if (copied < 0)
 | 
					 | 
				
			||||||
			return -EFAULT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		iov_iter_advance(from, copied);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		truesize = PAGE_ALIGN(copied + start);
 | 
					 | 
				
			||||||
		skb->data_len += copied;
 | 
					 | 
				
			||||||
		skb->len += copied;
 | 
					 | 
				
			||||||
		skb->truesize += truesize;
 | 
					 | 
				
			||||||
		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 | 
					 | 
				
			||||||
		while (copied) {
 | 
					 | 
				
			||||||
			int size = min_t(int, copied, PAGE_SIZE - start);
 | 
					 | 
				
			||||||
			skb_fill_page_desc(skb, frag++, pages[n], start, size);
 | 
					 | 
				
			||||||
			start = 0;
 | 
					 | 
				
			||||||
			copied -= size;
 | 
					 | 
				
			||||||
			n++;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(zerocopy_sg_from_iter);
 | 
					EXPORT_SYMBOL(zerocopy_sg_from_iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -915,6 +915,139 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(skb_morph);
 | 
					EXPORT_SYMBOL_GPL(skb_morph);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ubuf_info *uarg;
 | 
				
			||||||
 | 
						struct sk_buff *skb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						WARN_ON_ONCE(!in_task());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						skb = sock_omalloc(sk, 0, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!skb)
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
 | 
				
			||||||
 | 
						uarg = (void *)skb->cb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						uarg->callback = sock_zerocopy_callback;
 | 
				
			||||||
 | 
						uarg->desc = atomic_inc_return(&sk->sk_zckey) - 1;
 | 
				
			||||||
 | 
						uarg->zerocopy = 1;
 | 
				
			||||||
 | 
						atomic_set(&uarg->refcnt, 0);
 | 
				
			||||||
 | 
						sock_hold(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return uarg;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return container_of((void *)uarg, struct sk_buff, cb);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff *skb = skb_from_uarg(uarg);
 | 
				
			||||||
 | 
						struct sock_exterr_skb *serr;
 | 
				
			||||||
 | 
						struct sock *sk = skb->sk;
 | 
				
			||||||
 | 
						u16 id = uarg->desc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sock_flag(sk, SOCK_DEAD))
 | 
				
			||||||
 | 
							goto release;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						serr = SKB_EXT_ERR(skb);
 | 
				
			||||||
 | 
						memset(serr, 0, sizeof(*serr));
 | 
				
			||||||
 | 
						serr->ee.ee_errno = 0;
 | 
				
			||||||
 | 
						serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
 | 
				
			||||||
 | 
						serr->ee.ee_data = id;
 | 
				
			||||||
 | 
						if (!success)
 | 
				
			||||||
 | 
							serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						skb_queue_tail(&sk->sk_error_queue, skb);
 | 
				
			||||||
 | 
						skb = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sk->sk_error_report(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					release:
 | 
				
			||||||
 | 
						consume_skb(skb);
 | 
				
			||||||
 | 
						sock_put(sk);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sock_zerocopy_put(struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
 | 
				
			||||||
 | 
							if (uarg->callback)
 | 
				
			||||||
 | 
								uarg->callback(uarg, uarg->zerocopy);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								consume_skb(skb_from_uarg(uarg));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sock_zerocopy_put);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sock_zerocopy_put_abort(struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (uarg) {
 | 
				
			||||||
 | 
							struct sock *sk = skb_from_uarg(uarg)->sk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							atomic_dec(&sk->sk_zckey);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* sock_zerocopy_put expects a ref. Most sockets take one per
 | 
				
			||||||
 | 
							 * skb, which is zero on abort. tcp_sendmsg holds one extra, to
 | 
				
			||||||
 | 
							 * avoid an skb send inside the main loop triggering uarg free.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (sk->sk_type != SOCK_STREAM)
 | 
				
			||||||
 | 
								atomic_inc(&uarg->refcnt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							sock_zerocopy_put(uarg);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
									   struct iov_iter *from, size_t length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
 | 
								     struct msghdr *msg, int len,
 | 
				
			||||||
 | 
								     struct ubuf_info *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct iov_iter orig_iter = msg->msg_iter;
 | 
				
			||||||
 | 
						int err, orig_len = skb->len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
 | 
				
			||||||
 | 
						if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
 | 
				
			||||||
 | 
							/* Streams do not free skb on error. Reset to prev state. */
 | 
				
			||||||
 | 
							msg->msg_iter = orig_iter;
 | 
				
			||||||
 | 
							___pskb_trim(skb, orig_len);
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						skb_zcopy_set(skb, uarg);
 | 
				
			||||||
 | 
						return skb->len - orig_len;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* unused only until next patch in the series; will remove attribute */
 | 
				
			||||||
 | 
					static int __attribute__((unused))
 | 
				
			||||||
 | 
						   skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
 | 
				
			||||||
 | 
								      gfp_t gfp_mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (skb_zcopy(orig)) {
 | 
				
			||||||
 | 
							if (skb_zcopy(nskb)) {
 | 
				
			||||||
 | 
								/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
 | 
				
			||||||
 | 
								if (!gfp_mask) {
 | 
				
			||||||
 | 
									WARN_ON_ONCE(1);
 | 
				
			||||||
 | 
									return -ENOMEM;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if (skb_uarg(nskb) == skb_uarg(orig))
 | 
				
			||||||
 | 
									return 0;
 | 
				
			||||||
 | 
								if (skb_copy_ubufs(nskb, GFP_ATOMIC))
 | 
				
			||||||
 | 
									return -EIO;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							skb_zcopy_set(nskb, skb_uarg(orig));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
 | 
					 *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
 | 
				
			||||||
 *	@skb: the skb to modify
 | 
					 *	@skb: the skb to modify
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1670,6 +1670,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 | 
				
			||||||
		atomic_set(&newsk->sk_drops, 0);
 | 
							atomic_set(&newsk->sk_drops, 0);
 | 
				
			||||||
		newsk->sk_send_head	= NULL;
 | 
							newsk->sk_send_head	= NULL;
 | 
				
			||||||
		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 | 
							newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 | 
				
			||||||
 | 
							atomic_set(&newsk->sk_zckey, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sock_reset_flag(newsk, SOCK_DONE);
 | 
							sock_reset_flag(newsk, SOCK_DONE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2722,6 +2723,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 | 
				
			||||||
	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 | 
						sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk->sk_stamp = SK_DEFAULT_STAMP;
 | 
						sk->sk_stamp = SK_DEFAULT_STAMP;
 | 
				
			||||||
 | 
						atomic_set(&sk->sk_zckey, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_NET_RX_BUSY_POLL
 | 
					#ifdef CONFIG_NET_RX_BUSY_POLL
 | 
				
			||||||
	sk->sk_napi_id		=	0;
 | 
						sk->sk_napi_id		=	0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue