forked from mirrors/linux
		
	skbuff: Add pskb_extract() helper function
A pattern of skb usage seen in modules such as RDS-TCP is to extract `to_copy' bytes from the received TCP segment, starting at some offset `off' into a new skb `clone'. This is done in the ->data_ready callback, where the clone skb is queued up for rx on the PF_RDS socket, while the parent TCP segment is returned unchanged back to the TCP engine. The existing code uses the sequence clone = skb_clone(..); pskb_pull(clone, off, ..); pskb_trim(clone, to_copy, ..); with the intention of discarding the first `off' bytes. However, skb_clone() + pskb_pull() implies pksb_expand_head(), which ends up doing a redundant memcpy of bytes that will then get discarded in __pskb_pull_tail(). To avoid this inefficiency, this commit adds pskb_extract() that creates the clone, and memcpy's only the relevant header/frag/frag_list to the start of `clone'. pskb_trim() is then invoked to trim clone down to the requested to_copy bytes. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									557fc4a098
								
							
						
					
					
						commit
						6fa01ccd88
					
				
					 2 changed files with 244 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -2986,6 +2986,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 | 
			
		|||
int skb_ensure_writable(struct sk_buff *skb, int write_len);
 | 
			
		||||
int skb_vlan_pop(struct sk_buff *skb);
 | 
			
		||||
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 | 
			
		||||
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
 | 
			
		||||
			     gfp_t gfp);
 | 
			
		||||
 | 
			
		||||
static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4622,3 +4622,245 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 | 
			
		|||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(alloc_skb_with_frags);
 | 
			
		||||
 | 
			
		||||
/* carve out the first off bytes from skb when off < headlen */
 | 
			
		||||
static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
 | 
			
		||||
				    const int headlen, gfp_t gfp_mask)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	int size = skb_end_offset(skb);
 | 
			
		||||
	int new_hlen = headlen - off;
 | 
			
		||||
	u8 *data;
 | 
			
		||||
	int doff = 0;
 | 
			
		||||
 | 
			
		||||
	size = SKB_DATA_ALIGN(size);
 | 
			
		||||
 | 
			
		||||
	if (skb_pfmemalloc(skb))
 | 
			
		||||
		gfp_mask |= __GFP_MEMALLOC;
 | 
			
		||||
	data = kmalloc_reserve(size +
 | 
			
		||||
			       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
 | 
			
		||||
			       gfp_mask, NUMA_NO_NODE, NULL);
 | 
			
		||||
	if (!data)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	size = SKB_WITH_OVERHEAD(ksize(data));
 | 
			
		||||
 | 
			
		||||
	/* Copy real data, and all frags */
 | 
			
		||||
	skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
 | 
			
		||||
	skb->len -= off;
 | 
			
		||||
 | 
			
		||||
	memcpy((struct skb_shared_info *)(data + size),
 | 
			
		||||
	       skb_shinfo(skb),
 | 
			
		||||
	       offsetof(struct skb_shared_info,
 | 
			
		||||
			frags[skb_shinfo(skb)->nr_frags]));
 | 
			
		||||
	if (skb_cloned(skb)) {
 | 
			
		||||
		/* drop the old head gracefully */
 | 
			
		||||
		if (skb_orphan_frags(skb, gfp_mask)) {
 | 
			
		||||
			kfree(data);
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
		}
 | 
			
		||||
		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 | 
			
		||||
			skb_frag_ref(skb, i);
 | 
			
		||||
		if (skb_has_frag_list(skb))
 | 
			
		||||
			skb_clone_fraglist(skb);
 | 
			
		||||
		skb_release_data(skb);
 | 
			
		||||
	} else {
 | 
			
		||||
		/* we can reuse existing recount- all we did was
 | 
			
		||||
		 * relocate values
 | 
			
		||||
		 */
 | 
			
		||||
		skb_free_head(skb);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	doff = (data - skb->head);
 | 
			
		||||
	skb->head = data;
 | 
			
		||||
	skb->data = data;
 | 
			
		||||
	skb->head_frag = 0;
 | 
			
		||||
#ifdef NET_SKBUFF_DATA_USES_OFFSET
 | 
			
		||||
	skb->end = size;
 | 
			
		||||
	doff = 0;
 | 
			
		||||
#else
 | 
			
		||||
	skb->end = skb->head + size;
 | 
			
		||||
#endif
 | 
			
		||||
	skb_set_tail_pointer(skb, skb_headlen(skb));
 | 
			
		||||
	skb_headers_offset_update(skb, 0);
 | 
			
		||||
	skb->cloned = 0;
 | 
			
		||||
	skb->hdr_len = 0;
 | 
			
		||||
	skb->nohdr = 0;
 | 
			
		||||
	atomic_set(&skb_shinfo(skb)->dataref, 1);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
 | 
			
		||||
 | 
			
		||||
/* carve out the first eat bytes from skb's frag_list. May recurse into
 | 
			
		||||
 * pskb_carve()
 | 
			
		||||
 */
 | 
			
		||||
static int pskb_carve_frag_list(struct sk_buff *skb,
 | 
			
		||||
				struct skb_shared_info *shinfo, int eat,
 | 
			
		||||
				gfp_t gfp_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct sk_buff *list = shinfo->frag_list;
 | 
			
		||||
	struct sk_buff *clone = NULL;
 | 
			
		||||
	struct sk_buff *insp = NULL;
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		if (!list) {
 | 
			
		||||
			pr_err("Not enough bytes to eat. Want %d\n", eat);
 | 
			
		||||
			return -EFAULT;
 | 
			
		||||
		}
 | 
			
		||||
		if (list->len <= eat) {
 | 
			
		||||
			/* Eaten as whole. */
 | 
			
		||||
			eat -= list->len;
 | 
			
		||||
			list = list->next;
 | 
			
		||||
			insp = list;
 | 
			
		||||
		} else {
 | 
			
		||||
			/* Eaten partially. */
 | 
			
		||||
			if (skb_shared(list)) {
 | 
			
		||||
				clone = skb_clone(list, gfp_mask);
 | 
			
		||||
				if (!clone)
 | 
			
		||||
					return -ENOMEM;
 | 
			
		||||
				insp = list->next;
 | 
			
		||||
				list = clone;
 | 
			
		||||
			} else {
 | 
			
		||||
				/* This may be pulled without problems. */
 | 
			
		||||
				insp = list;
 | 
			
		||||
			}
 | 
			
		||||
			if (pskb_carve(list, eat, gfp_mask) < 0) {
 | 
			
		||||
				kfree_skb(clone);
 | 
			
		||||
				return -ENOMEM;
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	} while (eat);
 | 
			
		||||
 | 
			
		||||
	/* Free pulled out fragments. */
 | 
			
		||||
	while ((list = shinfo->frag_list) != insp) {
 | 
			
		||||
		shinfo->frag_list = list->next;
 | 
			
		||||
		kfree_skb(list);
 | 
			
		||||
	}
 | 
			
		||||
	/* And insert new clone at head. */
 | 
			
		||||
	if (clone) {
 | 
			
		||||
		clone->next = list;
 | 
			
		||||
		shinfo->frag_list = clone;
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* carve off first len bytes from skb. Split line (off) is in the
 | 
			
		||||
 * non-linear part of skb
 | 
			
		||||
 */
 | 
			
		||||
static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 | 
			
		||||
				       int pos, gfp_t gfp_mask)
 | 
			
		||||
{
 | 
			
		||||
	int i, k = 0;
 | 
			
		||||
	int size = skb_end_offset(skb);
 | 
			
		||||
	u8 *data;
 | 
			
		||||
	const int nfrags = skb_shinfo(skb)->nr_frags;
 | 
			
		||||
	struct skb_shared_info *shinfo;
 | 
			
		||||
	int doff = 0;
 | 
			
		||||
 | 
			
		||||
	size = SKB_DATA_ALIGN(size);
 | 
			
		||||
 | 
			
		||||
	if (skb_pfmemalloc(skb))
 | 
			
		||||
		gfp_mask |= __GFP_MEMALLOC;
 | 
			
		||||
	data = kmalloc_reserve(size +
 | 
			
		||||
			       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
 | 
			
		||||
			       gfp_mask, NUMA_NO_NODE, NULL);
 | 
			
		||||
	if (!data)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	size = SKB_WITH_OVERHEAD(ksize(data));
 | 
			
		||||
 | 
			
		||||
	memcpy((struct skb_shared_info *)(data + size),
 | 
			
		||||
	       skb_shinfo(skb), offsetof(struct skb_shared_info,
 | 
			
		||||
					 frags[skb_shinfo(skb)->nr_frags]));
 | 
			
		||||
	if (skb_orphan_frags(skb, gfp_mask)) {
 | 
			
		||||
		kfree(data);
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	}
 | 
			
		||||
	shinfo = (struct skb_shared_info *)(data + size);
 | 
			
		||||
	for (i = 0; i < nfrags; i++) {
 | 
			
		||||
		int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 | 
			
		||||
 | 
			
		||||
		if (pos + fsize > off) {
 | 
			
		||||
			shinfo->frags[k] = skb_shinfo(skb)->frags[i];
 | 
			
		||||
 | 
			
		||||
			if (pos < off) {
 | 
			
		||||
				/* Split frag.
 | 
			
		||||
				 * We have two variants in this case:
 | 
			
		||||
				 * 1. Move all the frag to the second
 | 
			
		||||
				 *    part, if it is possible. F.e.
 | 
			
		||||
				 *    this approach is mandatory for TUX,
 | 
			
		||||
				 *    where splitting is expensive.
 | 
			
		||||
				 * 2. Split is accurately. We make this.
 | 
			
		||||
				 */
 | 
			
		||||
				shinfo->frags[0].page_offset += off - pos;
 | 
			
		||||
				skb_frag_size_sub(&shinfo->frags[0], off - pos);
 | 
			
		||||
			}
 | 
			
		||||
			skb_frag_ref(skb, i);
 | 
			
		||||
			k++;
 | 
			
		||||
		}
 | 
			
		||||
		pos += fsize;
 | 
			
		||||
	}
 | 
			
		||||
	shinfo->nr_frags = k;
 | 
			
		||||
	if (skb_has_frag_list(skb))
 | 
			
		||||
		skb_clone_fraglist(skb);
 | 
			
		||||
 | 
			
		||||
	if (k == 0) {
 | 
			
		||||
		/* split line is in frag list */
 | 
			
		||||
		pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
 | 
			
		||||
	}
 | 
			
		||||
	skb_release_data(skb);
 | 
			
		||||
 | 
			
		||||
	doff = (data - skb->head);
 | 
			
		||||
	skb->head = data;
 | 
			
		||||
	skb->head_frag = 0;
 | 
			
		||||
	skb->data = data;
 | 
			
		||||
#ifdef NET_SKBUFF_DATA_USES_OFFSET
 | 
			
		||||
	skb->end = size;
 | 
			
		||||
	doff = 0;
 | 
			
		||||
#else
 | 
			
		||||
	skb->end = skb->head + size;
 | 
			
		||||
#endif
 | 
			
		||||
	skb_reset_tail_pointer(skb);
 | 
			
		||||
	skb_headers_offset_update(skb, 0);
 | 
			
		||||
	skb->cloned   = 0;
 | 
			
		||||
	skb->hdr_len  = 0;
 | 
			
		||||
	skb->nohdr    = 0;
 | 
			
		||||
	skb->len -= off;
 | 
			
		||||
	skb->data_len = skb->len;
 | 
			
		||||
	atomic_set(&skb_shinfo(skb)->dataref, 1);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* remove len bytes from the beginning of the skb */
 | 
			
		||||
static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
 | 
			
		||||
{
 | 
			
		||||
	int headlen = skb_headlen(skb);
 | 
			
		||||
 | 
			
		||||
	if (len < headlen)
 | 
			
		||||
		return pskb_carve_inside_header(skb, len, headlen, gfp);
 | 
			
		||||
	else
 | 
			
		||||
		return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Extract to_copy bytes starting at off from skb, and return this in
 | 
			
		||||
 * a new skb
 | 
			
		||||
 */
 | 
			
		||||
struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
 | 
			
		||||
			     int to_copy, gfp_t gfp)
 | 
			
		||||
{
 | 
			
		||||
	struct sk_buff  *clone = skb_clone(skb, gfp);
 | 
			
		||||
 | 
			
		||||
	if (!clone)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	if (pskb_carve(clone, off, gfp) < 0 ||
 | 
			
		||||
	    pskb_trim(clone, to_copy)) {
 | 
			
		||||
		kfree_skb(clone);
 | 
			
		||||
		return NULL;
 | 
			
		||||
	}
 | 
			
		||||
	return clone;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(pskb_extract);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue