forked from mirrors/linux
		
	net: sk_buff rbnode reorg
skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Wei Wang <weiwan@google.com> Cc: Willem de Bruijn <willemb@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									a38b2fa37e
								
							
						
					
					
						commit
						bffa72cf7f
					
				
					 4 changed files with 17 additions and 39 deletions
				
			
		|  | @ -661,8 +661,12 @@ struct sk_buff { | ||||||
| 			struct sk_buff		*prev; | 			struct sk_buff		*prev; | ||||||
| 
 | 
 | ||||||
| 			union { | 			union { | ||||||
| 				ktime_t		tstamp; | 				struct net_device	*dev; | ||||||
| 				u64		skb_mstamp; | 				/* Some protocols might use this space to store information,
 | ||||||
|  | 				 * while device pointer would be NULL. | ||||||
|  | 				 * UDP receive path is one user. | ||||||
|  | 				 */ | ||||||
|  | 				unsigned long		dev_scratch; | ||||||
| 			}; | 			}; | ||||||
| 		}; | 		}; | ||||||
| 		struct rb_node	rbnode; /* used in netem & tcp stack */ | 		struct rb_node	rbnode; /* used in netem & tcp stack */ | ||||||
|  | @ -670,12 +674,8 @@ struct sk_buff { | ||||||
| 	struct sock		*sk; | 	struct sock		*sk; | ||||||
| 
 | 
 | ||||||
| 	union { | 	union { | ||||||
| 		struct net_device	*dev; | 		ktime_t		tstamp; | ||||||
| 		/* Some protocols might use this space to store information,
 | 		u64		skb_mstamp; | ||||||
| 		 * while device pointer would be NULL. |  | ||||||
| 		 * UDP receive path is one user. |  | ||||||
| 		 */ |  | ||||||
| 		unsigned long		dev_scratch; |  | ||||||
| 	}; | 	}; | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * This is the control buffer. It is free to use for every | 	 * This is the control buffer. It is free to use for every | ||||||
|  |  | ||||||
|  | @ -797,12 +797,6 @@ struct tcp_skb_cb { | ||||||
| 			u16	tcp_gso_segs; | 			u16	tcp_gso_segs; | ||||||
| 			u16	tcp_gso_size; | 			u16	tcp_gso_size; | ||||||
| 		}; | 		}; | ||||||
| 
 |  | ||||||
| 		/* Used to stash the receive timestamp while this skb is in the
 |  | ||||||
| 		 * out of order queue, as skb->tstamp is overwritten by the |  | ||||||
| 		 * rbnode. |  | ||||||
| 		 */ |  | ||||||
| 		ktime_t		swtstamp; |  | ||||||
| 	}; | 	}; | ||||||
| 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/ | 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/ | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) | ||||||
| 	tp->rx_opt.num_sacks = num_sacks; | 	tp->rx_opt.num_sacks = num_sacks; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| enum tcp_queue { |  | ||||||
| 	OOO_QUEUE, |  | ||||||
| 	RCV_QUEUE, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * tcp_try_coalesce - try to merge skb to prior one |  * tcp_try_coalesce - try to merge skb to prior one | ||||||
|  * @sk: socket |  * @sk: socket | ||||||
|  | @ -4286,7 +4281,6 @@ enum tcp_queue { | ||||||
|  * Returns true if caller should free @from instead of queueing it |  * Returns true if caller should free @from instead of queueing it | ||||||
|  */ |  */ | ||||||
| static bool tcp_try_coalesce(struct sock *sk, | static bool tcp_try_coalesce(struct sock *sk, | ||||||
| 			     enum tcp_queue dest, |  | ||||||
| 			     struct sk_buff *to, | 			     struct sk_buff *to, | ||||||
| 			     struct sk_buff *from, | 			     struct sk_buff *from, | ||||||
| 			     bool *fragstolen) | 			     bool *fragstolen) | ||||||
|  | @ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, | ||||||
| 
 | 
 | ||||||
| 	if (TCP_SKB_CB(from)->has_rxtstamp) { | 	if (TCP_SKB_CB(from)->has_rxtstamp) { | ||||||
| 		TCP_SKB_CB(to)->has_rxtstamp = true; | 		TCP_SKB_CB(to)->has_rxtstamp = true; | ||||||
| 		if (dest == OOO_QUEUE) | 		to->tstamp = from->tstamp; | ||||||
| 			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; |  | ||||||
| 		else |  | ||||||
| 			to->tstamp = from->tstamp; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return true; | 	return true; | ||||||
|  | @ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk) | ||||||
| 		} | 		} | ||||||
| 		p = rb_next(p); | 		p = rb_next(p); | ||||||
| 		rb_erase(&skb->rbnode, &tp->out_of_order_queue); | 		rb_erase(&skb->rbnode, &tp->out_of_order_queue); | ||||||
| 		/* Replace tstamp which was stomped by rbnode */ |  | ||||||
| 		if (TCP_SKB_CB(skb)->has_rxtstamp) |  | ||||||
| 			skb->tstamp = TCP_SKB_CB(skb)->swtstamp; |  | ||||||
| 
 | 
 | ||||||
| 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { | 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { | ||||||
| 			SOCK_DEBUG(sk, "ofo packet was already received\n"); | 			SOCK_DEBUG(sk, "ofo packet was already received\n"); | ||||||
|  | @ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk) | ||||||
| 			   TCP_SKB_CB(skb)->end_seq); | 			   TCP_SKB_CB(skb)->end_seq); | ||||||
| 
 | 
 | ||||||
| 		tail = skb_peek_tail(&sk->sk_receive_queue); | 		tail = skb_peek_tail(&sk->sk_receive_queue); | ||||||
| 		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, | 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); | ||||||
| 						 tail, skb, &fragstolen); |  | ||||||
| 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); | 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); | ||||||
| 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; | 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; | ||||||
| 		if (!eaten) | 		if (!eaten) | ||||||
|  | @ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Stash tstamp to avoid being stomped on by rbnode */ |  | ||||||
| 	if (TCP_SKB_CB(skb)->has_rxtstamp) |  | ||||||
| 		TCP_SKB_CB(skb)->swtstamp = skb->tstamp; |  | ||||||
| 
 |  | ||||||
| 	/* Disable header prediction. */ | 	/* Disable header prediction. */ | ||||||
| 	tp->pred_flags = 0; | 	tp->pred_flags = 0; | ||||||
| 	inet_csk_schedule_ack(sk); | 	inet_csk_schedule_ack(sk); | ||||||
|  | @ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||||||
| 	/* In the typical case, we are adding an skb to the end of the list.
 | 	/* In the typical case, we are adding an skb to the end of the list.
 | ||||||
| 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, | 	if (tcp_try_coalesce(sk, tp->ooo_last_skb, | ||||||
| 			     skb, &fragstolen)) { | 			     skb, &fragstolen)) { | ||||||
| coalesce_done: | coalesce_done: | ||||||
| 		tcp_grow_window(sk, skb); | 		tcp_grow_window(sk, skb); | ||||||
|  | @ -4502,7 +4485,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | ||||||
| 				__kfree_skb(skb1); | 				__kfree_skb(skb1); | ||||||
| 				goto merge_right; | 				goto merge_right; | ||||||
| 			} | 			} | ||||||
| 		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, | 		} else if (tcp_try_coalesce(sk, skb1, | ||||||
| 					    skb, &fragstolen)) { | 					    skb, &fragstolen)) { | ||||||
| 			goto coalesce_done; | 			goto coalesce_done; | ||||||
| 		} | 		} | ||||||
|  | @ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | ||||||
| 
 | 
 | ||||||
| 	__skb_pull(skb, hdrlen); | 	__skb_pull(skb, hdrlen); | ||||||
| 	eaten = (tail && | 	eaten = (tail && | ||||||
| 		 tcp_try_coalesce(sk, RCV_QUEUE, tail, | 		 tcp_try_coalesce(sk, tail, | ||||||
| 				  skb, fragstolen)) ? 1 : 0; | 				  skb, fragstolen)) ? 1 : 0; | ||||||
| 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); | 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); | ||||||
| 	if (!eaten) { | 	if (!eaten) { | ||||||
|  |  | ||||||
|  | @ -146,7 +146,6 @@ struct netem_sched_data { | ||||||
|  */ |  */ | ||||||
| struct netem_skb_cb { | struct netem_skb_cb { | ||||||
| 	psched_time_t	time_to_send; | 	psched_time_t	time_to_send; | ||||||
| 	ktime_t		tstamp_save; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		cb->time_to_send = now + delay; | 		cb->time_to_send = now + delay; | ||||||
| 		cb->tstamp_save = skb->tstamp; |  | ||||||
| 		++q->counter; | 		++q->counter; | ||||||
| 		tfifo_enqueue(skb, sch); | 		tfifo_enqueue(skb, sch); | ||||||
| 	} else { | 	} else { | ||||||
|  | @ -629,7 +627,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | ||||||
| 			qdisc_qstats_backlog_dec(sch, skb); | 			qdisc_qstats_backlog_dec(sch, skb); | ||||||
| 			skb->next = NULL; | 			skb->next = NULL; | ||||||
| 			skb->prev = NULL; | 			skb->prev = NULL; | ||||||
| 			skb->tstamp = netem_skb_cb(skb)->tstamp_save; | 			/* skb->dev shares skb->rbnode area,
 | ||||||
|  | 			 * we need to restore its value. | ||||||
|  | 			 */ | ||||||
|  | 			skb->dev = qdisc_dev(sch); | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_NET_CLS_ACT | #ifdef CONFIG_NET_CLS_ACT | ||||||
| 			/*
 | 			/*
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet