forked from mirrors/linux
		
	net: Convert NAPI gro list into a small hash table.
Improve the performance of GRO receive by splitting flows into multiple hash chains. Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									d4546c2509
								
							
						
					
					
						commit
						07d78363dc
					
				
					 2 changed files with 81 additions and 27 deletions
				
			
		| 
						 | 
					@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str);
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Structure for NAPI scheduling similar to tasklet but with weighting
 | 
					 * Structure for NAPI scheduling similar to tasklet but with weighting
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					#define GRO_HASH_BUCKETS	8
 | 
				
			||||||
struct napi_struct {
 | 
					struct napi_struct {
 | 
				
			||||||
	/* The poll_list must only be managed by the entity which
 | 
						/* The poll_list must only be managed by the entity which
 | 
				
			||||||
	 * changes the state of the NAPI_STATE_SCHED bit.  This means
 | 
						 * changes the state of the NAPI_STATE_SCHED bit.  This means
 | 
				
			||||||
| 
						 | 
					@ -322,7 +323,7 @@ struct napi_struct {
 | 
				
			||||||
	int			poll_owner;
 | 
						int			poll_owner;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	struct net_device	*dev;
 | 
						struct net_device	*dev;
 | 
				
			||||||
	struct list_head	gro_list;
 | 
						struct list_head	gro_hash[GRO_HASH_BUCKETS];
 | 
				
			||||||
	struct sk_buff		*skb;
 | 
						struct sk_buff		*skb;
 | 
				
			||||||
	struct hrtimer		timer;
 | 
						struct hrtimer		timer;
 | 
				
			||||||
	struct list_head	dev_list;
 | 
						struct list_head	dev_list;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										103
									
								
								net/core/dev.c
									
									
									
									
									
								
							
							
						
						
									
										103
									
								
								net/core/dev.c
									
									
									
									
									
								
							| 
						 | 
					@ -4875,15 +4875,12 @@ static int napi_gro_complete(struct sk_buff *skb)
 | 
				
			||||||
	return netif_receive_skb_internal(skb);
 | 
						return netif_receive_skb_internal(skb);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* napi->gro_list contains packets ordered by age.
 | 
					static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
 | 
				
			||||||
 * youngest packets at the head of it.
 | 
									   bool flush_old)
 | 
				
			||||||
 * Complete skbs in reverse order to reduce latencies.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct sk_buff *skb, *p;
 | 
						struct sk_buff *skb, *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) {
 | 
						list_for_each_entry_safe_reverse(skb, p, head, list) {
 | 
				
			||||||
		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 | 
							if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 | 
				
			||||||
			return;
 | 
								return;
 | 
				
			||||||
		list_del_init(&skb->list);
 | 
							list_del_init(&skb->list);
 | 
				
			||||||
| 
						 | 
					@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 | 
				
			||||||
		napi->gro_count--;
 | 
							napi->gro_count--;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* napi->gro_hash contains packets ordered by age.
 | 
				
			||||||
 | 
					 * youngest packets at the head of it.
 | 
				
			||||||
 | 
					 * Complete skbs in reverse order to reduce latencies.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 | 
				
			||||||
 | 
							struct list_head *head = &napi->gro_hash[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							__napi_gro_flush_chain(napi, head, flush_old);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(napi_gro_flush);
 | 
					EXPORT_SYMBOL(napi_gro_flush);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 | 
					static struct list_head *gro_list_prepare(struct napi_struct *napi,
 | 
				
			||||||
 | 
										  struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int maclen = skb->dev->hard_header_len;
 | 
						unsigned int maclen = skb->dev->hard_header_len;
 | 
				
			||||||
	u32 hash = skb_get_hash_raw(skb);
 | 
						u32 hash = skb_get_hash_raw(skb);
 | 
				
			||||||
 | 
						struct list_head *head;
 | 
				
			||||||
	struct sk_buff *p;
 | 
						struct sk_buff *p;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry(p, &napi->gro_list, list) {
 | 
						head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
 | 
				
			||||||
 | 
						list_for_each_entry(p, head, list) {
 | 
				
			||||||
		unsigned long diffs;
 | 
							unsigned long diffs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		NAPI_GRO_CB(p)->flush = 0;
 | 
							NAPI_GRO_CB(p)->flush = 0;
 | 
				
			||||||
| 
						 | 
					@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 | 
				
			||||||
				       maclen);
 | 
									       maclen);
 | 
				
			||||||
		NAPI_GRO_CB(p)->same_flow = !diffs;
 | 
							NAPI_GRO_CB(p)->same_flow = !diffs;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return head;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void skb_gro_reset_offset(struct sk_buff *skb)
 | 
					static void skb_gro_reset_offset(struct sk_buff *skb)
 | 
				
			||||||
| 
						 | 
					@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void gro_flush_oldest(struct napi_struct *napi)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff *oldest = NULL;
 | 
				
			||||||
 | 
						unsigned long age = jiffies;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 | 
				
			||||||
 | 
							struct list_head *head = &napi->gro_hash[i];
 | 
				
			||||||
 | 
							struct sk_buff *skb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (list_empty(head))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							skb = list_last_entry(head, struct sk_buff, list);
 | 
				
			||||||
 | 
							if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
 | 
				
			||||||
 | 
								oldest = skb;
 | 
				
			||||||
 | 
								age = NAPI_GRO_CB(skb)->age;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
 | 
				
			||||||
 | 
						 * impossible.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (WARN_ON_ONCE(!oldest))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do not adjust napi->gro_count, caller is adding a new SKB to
 | 
				
			||||||
 | 
						 * the chain.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						list_del(&oldest->list);
 | 
				
			||||||
 | 
						napi_gro_complete(oldest);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 | 
					static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct list_head *head = &offload_base;
 | 
						struct list_head *head = &offload_base;
 | 
				
			||||||
	struct packet_offload *ptype;
 | 
						struct packet_offload *ptype;
 | 
				
			||||||
	__be16 type = skb->protocol;
 | 
						__be16 type = skb->protocol;
 | 
				
			||||||
 | 
						struct list_head *gro_head;
 | 
				
			||||||
	struct sk_buff *pp = NULL;
 | 
						struct sk_buff *pp = NULL;
 | 
				
			||||||
	enum gro_result ret;
 | 
						enum gro_result ret;
 | 
				
			||||||
	int same_flow;
 | 
						int same_flow;
 | 
				
			||||||
| 
						 | 
					@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 | 
				
			||||||
	if (netif_elide_gro(skb->dev))
 | 
						if (netif_elide_gro(skb->dev))
 | 
				
			||||||
		goto normal;
 | 
							goto normal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gro_list_prepare(napi, skb);
 | 
						gro_head = gro_list_prepare(napi, skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
	list_for_each_entry_rcu(ptype, head, list) {
 | 
						list_for_each_entry_rcu(ptype, head, list) {
 | 
				
			||||||
| 
						 | 
					@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 | 
				
			||||||
			NAPI_GRO_CB(skb)->csum_valid = 0;
 | 
								NAPI_GRO_CB(skb)->csum_valid = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 | 
							pp = ptype->callbacks.gro_receive(gro_head, skb);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	rcu_read_unlock();
 | 
						rcu_read_unlock();
 | 
				
			||||||
| 
						 | 
					@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 | 
				
			||||||
		goto normal;
 | 
							goto normal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
 | 
						if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
 | 
				
			||||||
		struct sk_buff *nskb;
 | 
							gro_flush_oldest(napi);
 | 
				
			||||||
 | 
					 | 
				
			||||||
		nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
 | 
					 | 
				
			||||||
		list_del(&nskb->list);
 | 
					 | 
				
			||||||
		napi_gro_complete(nskb);
 | 
					 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		napi->gro_count++;
 | 
							napi->gro_count++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 | 
				
			||||||
	NAPI_GRO_CB(skb)->age = jiffies;
 | 
						NAPI_GRO_CB(skb)->age = jiffies;
 | 
				
			||||||
	NAPI_GRO_CB(skb)->last = skb;
 | 
						NAPI_GRO_CB(skb)->last = skb;
 | 
				
			||||||
	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 | 
						skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 | 
				
			||||||
	list_add(&skb->list, &napi->gro_list);
 | 
						list_add(&skb->list, gro_head);
 | 
				
			||||||
	ret = GRO_HELD;
 | 
						ret = GRO_HELD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pull:
 | 
					pull:
 | 
				
			||||||
| 
						 | 
					@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 | 
				
			||||||
				 NAPIF_STATE_IN_BUSY_POLL)))
 | 
									 NAPIF_STATE_IN_BUSY_POLL)))
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!list_empty(&n->gro_list)) {
 | 
						if (n->gro_count) {
 | 
				
			||||||
		unsigned long timeout = 0;
 | 
							unsigned long timeout = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (work_done)
 | 
							if (work_done)
 | 
				
			||||||
| 
						 | 
					@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 | 
				
			||||||
	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 | 
						/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 | 
				
			||||||
	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 | 
						 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) &&
 | 
						if (napi->gro_count && !napi_disable_pending(napi) &&
 | 
				
			||||||
	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 | 
						    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 | 
				
			||||||
		__napi_schedule_irqoff(napi);
 | 
							__napi_schedule_irqoff(napi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 | 
				
			||||||
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | 
					void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | 
				
			||||||
		    int (*poll)(struct napi_struct *, int), int weight)
 | 
							    int (*poll)(struct napi_struct *, int), int weight)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	INIT_LIST_HEAD(&napi->poll_list);
 | 
						INIT_LIST_HEAD(&napi->poll_list);
 | 
				
			||||||
	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 | 
						hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 | 
				
			||||||
	napi->timer.function = napi_watchdog;
 | 
						napi->timer.function = napi_watchdog;
 | 
				
			||||||
	napi->gro_count = 0;
 | 
						napi->gro_count = 0;
 | 
				
			||||||
	INIT_LIST_HEAD(&napi->gro_list);
 | 
						for (i = 0; i < GRO_HASH_BUCKETS; i++)
 | 
				
			||||||
 | 
							INIT_LIST_HEAD(&napi->gro_hash[i]);
 | 
				
			||||||
	napi->skb = NULL;
 | 
						napi->skb = NULL;
 | 
				
			||||||
	napi->poll = poll;
 | 
						napi->poll = poll;
 | 
				
			||||||
	if (weight > NAPI_POLL_WEIGHT)
 | 
						if (weight > NAPI_POLL_WEIGHT)
 | 
				
			||||||
| 
						 | 
					@ -5714,13 +5764,17 @@ void napi_disable(struct napi_struct *n)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(napi_disable);
 | 
					EXPORT_SYMBOL(napi_disable);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void gro_list_free(struct list_head *head)
 | 
					static void flush_gro_hash(struct napi_struct *napi)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct sk_buff *skb, *p;
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry_safe(skb, p, head, list)
 | 
						for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 | 
				
			||||||
 | 
							struct sk_buff *skb, *n;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
 | 
				
			||||||
			kfree_skb(skb);
 | 
								kfree_skb(skb);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Must be called in process context */
 | 
					/* Must be called in process context */
 | 
				
			||||||
void netif_napi_del(struct napi_struct *napi)
 | 
					void netif_napi_del(struct napi_struct *napi)
 | 
				
			||||||
| 
						 | 
					@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi)
 | 
				
			||||||
	list_del_init(&napi->dev_list);
 | 
						list_del_init(&napi->dev_list);
 | 
				
			||||||
	napi_free_frags(napi);
 | 
						napi_free_frags(napi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gro_list_free(&napi->gro_list);
 | 
						flush_gro_hash(napi);
 | 
				
			||||||
	INIT_LIST_HEAD(&napi->gro_list);
 | 
					 | 
				
			||||||
	napi->gro_count = 0;
 | 
						napi->gro_count = 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(netif_napi_del);
 | 
					EXPORT_SYMBOL(netif_napi_del);
 | 
				
			||||||
| 
						 | 
					@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!list_empty(&n->gro_list)) {
 | 
						if (n->gro_count) {
 | 
				
			||||||
		/* flush too old packets
 | 
							/* flush too old packets
 | 
				
			||||||
		 * If HZ < 1000, flush all packets.
 | 
							 * If HZ < 1000, flush all packets.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue