forked from mirrors/linux
		
	skbuff: Add pskb_extract() helper function
A pattern of skb usage seen in modules such as RDS-TCP is to extract `to_copy' bytes from the received TCP segment, starting at some offset `off' into a new skb `clone'. This is done in the ->data_ready callback, where the clone skb is queued up for rx on the PF_RDS socket, while the parent TCP segment is returned unchanged back to the TCP engine. The existing code uses the sequence clone = skb_clone(..); pskb_pull(clone, off, ..); pskb_trim(clone, to_copy, ..); with the intention of discarding the first `off' bytes. However, skb_clone() + pskb_pull() implies pksb_expand_head(), which ends up doing a redundant memcpy of bytes that will then get discarded in __pskb_pull_tail(). To avoid this inefficiency, this commit adds pskb_extract() that creates the clone, and memcpy's only the relevant header/frag/frag_list to the start of `clone'. pskb_trim() is then invoked to trim clone down to the requested to_copy bytes. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									557fc4a098
								
							
						
					
					
						commit
						6fa01ccd88
					
				
					 2 changed files with 244 additions and 0 deletions
				
			
		| 
						 | 
					@ -2986,6 +2986,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 | 
				
			||||||
int skb_ensure_writable(struct sk_buff *skb, int write_len);
 | 
					int skb_ensure_writable(struct sk_buff *skb, int write_len);
 | 
				
			||||||
int skb_vlan_pop(struct sk_buff *skb);
 | 
					int skb_vlan_pop(struct sk_buff *skb);
 | 
				
			||||||
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 | 
					int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 | 
				
			||||||
 | 
					struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
 | 
				
			||||||
 | 
								     gfp_t gfp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 | 
					static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4622,3 +4622,245 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(alloc_skb_with_frags);
 | 
					EXPORT_SYMBOL(alloc_skb_with_frags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* carve out the first off bytes from skb when off < headlen */
 | 
				
			||||||
 | 
					static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
 | 
				
			||||||
 | 
									    const int headlen, gfp_t gfp_mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						int size = skb_end_offset(skb);
 | 
				
			||||||
 | 
						int new_hlen = headlen - off;
 | 
				
			||||||
 | 
						u8 *data;
 | 
				
			||||||
 | 
						int doff = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = SKB_DATA_ALIGN(size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb_pfmemalloc(skb))
 | 
				
			||||||
 | 
							gfp_mask |= __GFP_MEMALLOC;
 | 
				
			||||||
 | 
						data = kmalloc_reserve(size +
 | 
				
			||||||
 | 
								       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
 | 
				
			||||||
 | 
								       gfp_mask, NUMA_NO_NODE, NULL);
 | 
				
			||||||
 | 
						if (!data)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = SKB_WITH_OVERHEAD(ksize(data));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Copy real data, and all frags */
 | 
				
			||||||
 | 
						skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
 | 
				
			||||||
 | 
						skb->len -= off;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						memcpy((struct skb_shared_info *)(data + size),
 | 
				
			||||||
 | 
						       skb_shinfo(skb),
 | 
				
			||||||
 | 
						       offsetof(struct skb_shared_info,
 | 
				
			||||||
 | 
								frags[skb_shinfo(skb)->nr_frags]));
 | 
				
			||||||
 | 
						if (skb_cloned(skb)) {
 | 
				
			||||||
 | 
							/* drop the old head gracefully */
 | 
				
			||||||
 | 
							if (skb_orphan_frags(skb, gfp_mask)) {
 | 
				
			||||||
 | 
								kfree(data);
 | 
				
			||||||
 | 
								return -ENOMEM;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 | 
				
			||||||
 | 
								skb_frag_ref(skb, i);
 | 
				
			||||||
 | 
							if (skb_has_frag_list(skb))
 | 
				
			||||||
 | 
								skb_clone_fraglist(skb);
 | 
				
			||||||
 | 
							skb_release_data(skb);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							/* we can reuse existing recount- all we did was
 | 
				
			||||||
 | 
							 * relocate values
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							skb_free_head(skb);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						doff = (data - skb->head);
 | 
				
			||||||
 | 
						skb->head = data;
 | 
				
			||||||
 | 
						skb->data = data;
 | 
				
			||||||
 | 
						skb->head_frag = 0;
 | 
				
			||||||
 | 
					#ifdef NET_SKBUFF_DATA_USES_OFFSET
 | 
				
			||||||
 | 
						skb->end = size;
 | 
				
			||||||
 | 
						doff = 0;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						skb->end = skb->head + size;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						skb_set_tail_pointer(skb, skb_headlen(skb));
 | 
				
			||||||
 | 
						skb_headers_offset_update(skb, 0);
 | 
				
			||||||
 | 
						skb->cloned = 0;
 | 
				
			||||||
 | 
						skb->hdr_len = 0;
 | 
				
			||||||
 | 
						skb->nohdr = 0;
 | 
				
			||||||
 | 
						atomic_set(&skb_shinfo(skb)->dataref, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* carve out the first eat bytes from skb's frag_list. May recurse into
 | 
				
			||||||
 | 
					 * pskb_carve()
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int pskb_carve_frag_list(struct sk_buff *skb,
 | 
				
			||||||
 | 
									struct skb_shared_info *shinfo, int eat,
 | 
				
			||||||
 | 
									gfp_t gfp_mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff *list = shinfo->frag_list;
 | 
				
			||||||
 | 
						struct sk_buff *clone = NULL;
 | 
				
			||||||
 | 
						struct sk_buff *insp = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							if (!list) {
 | 
				
			||||||
 | 
								pr_err("Not enough bytes to eat. Want %d\n", eat);
 | 
				
			||||||
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if (list->len <= eat) {
 | 
				
			||||||
 | 
								/* Eaten as whole. */
 | 
				
			||||||
 | 
								eat -= list->len;
 | 
				
			||||||
 | 
								list = list->next;
 | 
				
			||||||
 | 
								insp = list;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								/* Eaten partially. */
 | 
				
			||||||
 | 
								if (skb_shared(list)) {
 | 
				
			||||||
 | 
									clone = skb_clone(list, gfp_mask);
 | 
				
			||||||
 | 
									if (!clone)
 | 
				
			||||||
 | 
										return -ENOMEM;
 | 
				
			||||||
 | 
									insp = list->next;
 | 
				
			||||||
 | 
									list = clone;
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									/* This may be pulled without problems. */
 | 
				
			||||||
 | 
									insp = list;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if (pskb_carve(list, eat, gfp_mask) < 0) {
 | 
				
			||||||
 | 
									kfree_skb(clone);
 | 
				
			||||||
 | 
									return -ENOMEM;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						} while (eat);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Free pulled out fragments. */
 | 
				
			||||||
 | 
						while ((list = shinfo->frag_list) != insp) {
 | 
				
			||||||
 | 
							shinfo->frag_list = list->next;
 | 
				
			||||||
 | 
							kfree_skb(list);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* And insert new clone at head. */
 | 
				
			||||||
 | 
						if (clone) {
 | 
				
			||||||
 | 
							clone->next = list;
 | 
				
			||||||
 | 
							shinfo->frag_list = clone;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* carve off first len bytes from skb. Split line (off) is in the
 | 
				
			||||||
 | 
					 * non-linear part of skb
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 | 
				
			||||||
 | 
									       int pos, gfp_t gfp_mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i, k = 0;
 | 
				
			||||||
 | 
						int size = skb_end_offset(skb);
 | 
				
			||||||
 | 
						u8 *data;
 | 
				
			||||||
 | 
						const int nfrags = skb_shinfo(skb)->nr_frags;
 | 
				
			||||||
 | 
						struct skb_shared_info *shinfo;
 | 
				
			||||||
 | 
						int doff = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = SKB_DATA_ALIGN(size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (skb_pfmemalloc(skb))
 | 
				
			||||||
 | 
							gfp_mask |= __GFP_MEMALLOC;
 | 
				
			||||||
 | 
						data = kmalloc_reserve(size +
 | 
				
			||||||
 | 
								       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
 | 
				
			||||||
 | 
								       gfp_mask, NUMA_NO_NODE, NULL);
 | 
				
			||||||
 | 
						if (!data)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size = SKB_WITH_OVERHEAD(ksize(data));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						memcpy((struct skb_shared_info *)(data + size),
 | 
				
			||||||
 | 
						       skb_shinfo(skb), offsetof(struct skb_shared_info,
 | 
				
			||||||
 | 
										 frags[skb_shinfo(skb)->nr_frags]));
 | 
				
			||||||
 | 
						if (skb_orphan_frags(skb, gfp_mask)) {
 | 
				
			||||||
 | 
							kfree(data);
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						shinfo = (struct skb_shared_info *)(data + size);
 | 
				
			||||||
 | 
						for (i = 0; i < nfrags; i++) {
 | 
				
			||||||
 | 
							int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (pos + fsize > off) {
 | 
				
			||||||
 | 
								shinfo->frags[k] = skb_shinfo(skb)->frags[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (pos < off) {
 | 
				
			||||||
 | 
									/* Split frag.
 | 
				
			||||||
 | 
									 * We have two variants in this case:
 | 
				
			||||||
 | 
									 * 1. Move all the frag to the second
 | 
				
			||||||
 | 
									 *    part, if it is possible. F.e.
 | 
				
			||||||
 | 
									 *    this approach is mandatory for TUX,
 | 
				
			||||||
 | 
									 *    where splitting is expensive.
 | 
				
			||||||
 | 
									 * 2. Split is accurately. We make this.
 | 
				
			||||||
 | 
									 */
 | 
				
			||||||
 | 
									shinfo->frags[0].page_offset += off - pos;
 | 
				
			||||||
 | 
									skb_frag_size_sub(&shinfo->frags[0], off - pos);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								skb_frag_ref(skb, i);
 | 
				
			||||||
 | 
								k++;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							pos += fsize;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						shinfo->nr_frags = k;
 | 
				
			||||||
 | 
						if (skb_has_frag_list(skb))
 | 
				
			||||||
 | 
							skb_clone_fraglist(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (k == 0) {
 | 
				
			||||||
 | 
							/* split line is in frag list */
 | 
				
			||||||
 | 
							pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						skb_release_data(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						doff = (data - skb->head);
 | 
				
			||||||
 | 
						skb->head = data;
 | 
				
			||||||
 | 
						skb->head_frag = 0;
 | 
				
			||||||
 | 
						skb->data = data;
 | 
				
			||||||
 | 
					#ifdef NET_SKBUFF_DATA_USES_OFFSET
 | 
				
			||||||
 | 
						skb->end = size;
 | 
				
			||||||
 | 
						doff = 0;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						skb->end = skb->head + size;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						skb_reset_tail_pointer(skb);
 | 
				
			||||||
 | 
						skb_headers_offset_update(skb, 0);
 | 
				
			||||||
 | 
						skb->cloned   = 0;
 | 
				
			||||||
 | 
						skb->hdr_len  = 0;
 | 
				
			||||||
 | 
						skb->nohdr    = 0;
 | 
				
			||||||
 | 
						skb->len -= off;
 | 
				
			||||||
 | 
						skb->data_len = skb->len;
 | 
				
			||||||
 | 
						atomic_set(&skb_shinfo(skb)->dataref, 1);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* remove len bytes from the beginning of the skb */
 | 
				
			||||||
 | 
					static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int headlen = skb_headlen(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (len < headlen)
 | 
				
			||||||
 | 
							return pskb_carve_inside_header(skb, len, headlen, gfp);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Extract to_copy bytes starting at off from skb, and return this in
 | 
				
			||||||
 | 
					 * a new skb
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
 | 
				
			||||||
 | 
								     int to_copy, gfp_t gfp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff  *clone = skb_clone(skb, gfp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!clone)
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (pskb_carve(clone, off, gfp) < 0 ||
 | 
				
			||||||
 | 
						    pskb_trim(clone, to_copy)) {
 | 
				
			||||||
 | 
							kfree_skb(clone);
 | 
				
			||||||
 | 
							return NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return clone;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(pskb_extract);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue