mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: use listified RX for handling GRO_NORMAL skbs
When GRO decides not to coalesce a packet, in napi_frags_finish(), instead of passing it to the stack immediately, place it on a list in the napi struct. Then, at flush time (napi_complete_done(), napi_poll(), or napi_busy_loop()), call netif_receive_skb_list_internal() on the list. We'd like to do that in napi_gro_flush(), but it's not called if !napi->gro_bitmask, so we have to do it in the callers instead. (There are a handful of drivers that call napi_gro_flush() themselves, but it's not clear why, or whether this will affect them.) Because a full 64 packets is an inefficiently large batch, also consume the list whenever it exceeds gro_normal_batch, a new net/core sysctl that defaults to 8. Signed-off-by: Edward Cree <ecree@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									6727013694
								
							
						
					
					
						commit
						323ebb61e3
					
				
					 3 changed files with 52 additions and 3 deletions
				
			
		| 
						 | 
					@ -332,6 +332,8 @@ struct napi_struct {
 | 
				
			||||||
	struct net_device	*dev;
 | 
						struct net_device	*dev;
 | 
				
			||||||
	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 | 
						struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 | 
				
			||||||
	struct sk_buff		*skb;
 | 
						struct sk_buff		*skb;
 | 
				
			||||||
 | 
						struct list_head	rx_list; /* Pending GRO_NORMAL skbs */
 | 
				
			||||||
 | 
						int			rx_count; /* length of rx_list */
 | 
				
			||||||
	struct hrtimer		timer;
 | 
						struct hrtimer		timer;
 | 
				
			||||||
	struct list_head	dev_list;
 | 
						struct list_head	dev_list;
 | 
				
			||||||
	struct hlist_node	napi_hash_node;
 | 
						struct hlist_node	napi_hash_node;
 | 
				
			||||||
| 
						 | 
					@ -4239,6 +4241,7 @@ extern int		dev_weight_rx_bias;
 | 
				
			||||||
extern int		dev_weight_tx_bias;
 | 
					extern int		dev_weight_tx_bias;
 | 
				
			||||||
extern int		dev_rx_weight;
 | 
					extern int		dev_rx_weight;
 | 
				
			||||||
extern int		dev_tx_weight;
 | 
					extern int		dev_tx_weight;
 | 
				
			||||||
 | 
					extern int		gro_normal_batch;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 | 
					bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 | 
				
			||||||
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 | 
					struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
 | 
				
			||||||
int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
 | 
					int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
 | 
				
			||||||
int dev_rx_weight __read_mostly = 64;
 | 
					int dev_rx_weight __read_mostly = 64;
 | 
				
			||||||
int dev_tx_weight __read_mostly = 64;
 | 
					int dev_tx_weight __read_mostly = 64;
 | 
				
			||||||
 | 
					/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
 | 
				
			||||||
 | 
					int gro_normal_batch __read_mostly = 8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Called with irq disabled */
 | 
					/* Called with irq disabled */
 | 
				
			||||||
static inline void ____napi_schedule(struct softnet_data *sd,
 | 
					static inline void ____napi_schedule(struct softnet_data *sd,
 | 
				
			||||||
| 
						 | 
					@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(napi_get_frags);
 | 
					EXPORT_SYMBOL(napi_get_frags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
 | 
				
			||||||
 | 
					static void gro_normal_list(struct napi_struct *napi)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (!napi->rx_count)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						netif_receive_skb_list_internal(&napi->rx_list);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&napi->rx_list);
 | 
				
			||||||
 | 
						napi->rx_count = 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
 | 
				
			||||||
 | 
					 * pass the whole batch up to the stack.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						list_add_tail(&skb->list, &napi->rx_list);
 | 
				
			||||||
 | 
						if (++napi->rx_count >= gro_normal_batch)
 | 
				
			||||||
 | 
							gro_normal_list(napi);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static gro_result_t napi_frags_finish(struct napi_struct *napi,
 | 
					static gro_result_t napi_frags_finish(struct napi_struct *napi,
 | 
				
			||||||
				      struct sk_buff *skb,
 | 
									      struct sk_buff *skb,
 | 
				
			||||||
				      gro_result_t ret)
 | 
									      gro_result_t ret)
 | 
				
			||||||
| 
						 | 
					@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
 | 
				
			||||||
	case GRO_HELD:
 | 
						case GRO_HELD:
 | 
				
			||||||
		__skb_push(skb, ETH_HLEN);
 | 
							__skb_push(skb, ETH_HLEN);
 | 
				
			||||||
		skb->protocol = eth_type_trans(skb, skb->dev);
 | 
							skb->protocol = eth_type_trans(skb, skb->dev);
 | 
				
			||||||
		if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
 | 
							if (ret == GRO_NORMAL)
 | 
				
			||||||
			ret = GRO_DROP;
 | 
								gro_normal_one(napi, skb);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	case GRO_DROP:
 | 
						case GRO_DROP:
 | 
				
			||||||
| 
						 | 
					@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 | 
				
			||||||
				 NAPIF_STATE_IN_BUSY_POLL)))
 | 
									 NAPIF_STATE_IN_BUSY_POLL)))
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						gro_normal_list(n);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (n->gro_bitmask) {
 | 
						if (n->gro_bitmask) {
 | 
				
			||||||
		unsigned long timeout = 0;
 | 
							unsigned long timeout = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 | 
				
			||||||
	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 | 
						 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	rc = napi->poll(napi, BUSY_POLL_BUDGET);
 | 
						rc = napi->poll(napi, BUSY_POLL_BUDGET);
 | 
				
			||||||
 | 
						/* We can't gro_normal_list() here, because napi->poll() might have
 | 
				
			||||||
 | 
						 * rearmed the napi (napi_complete_done()) in which case it could
 | 
				
			||||||
 | 
						 * already be running on another CPU.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 | 
						trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 | 
				
			||||||
	netpoll_poll_unlock(have_poll_lock);
 | 
						netpoll_poll_unlock(have_poll_lock);
 | 
				
			||||||
	if (rc == BUSY_POLL_BUDGET)
 | 
						if (rc == BUSY_POLL_BUDGET) {
 | 
				
			||||||
 | 
							/* As the whole budget was spent, we still own the napi so can
 | 
				
			||||||
 | 
							 * safely handle the rx_list.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							gro_normal_list(napi);
 | 
				
			||||||
		__napi_schedule(napi);
 | 
							__napi_schedule(napi);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	local_bh_enable();
 | 
						local_bh_enable();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6167,6 +6200,7 @@ void napi_busy_loop(unsigned int napi_id,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		work = napi_poll(napi, BUSY_POLL_BUDGET);
 | 
							work = napi_poll(napi, BUSY_POLL_BUDGET);
 | 
				
			||||||
		trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
 | 
							trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
 | 
				
			||||||
 | 
							gro_normal_list(napi);
 | 
				
			||||||
count:
 | 
					count:
 | 
				
			||||||
		if (work > 0)
 | 
							if (work > 0)
 | 
				
			||||||
			__NET_ADD_STATS(dev_net(napi->dev),
 | 
								__NET_ADD_STATS(dev_net(napi->dev),
 | 
				
			||||||
| 
						 | 
					@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | 
				
			||||||
	napi->timer.function = napi_watchdog;
 | 
						napi->timer.function = napi_watchdog;
 | 
				
			||||||
	init_gro_hash(napi);
 | 
						init_gro_hash(napi);
 | 
				
			||||||
	napi->skb = NULL;
 | 
						napi->skb = NULL;
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&napi->rx_list);
 | 
				
			||||||
 | 
						napi->rx_count = 0;
 | 
				
			||||||
	napi->poll = poll;
 | 
						napi->poll = poll;
 | 
				
			||||||
	if (weight > NAPI_POLL_WEIGHT)
 | 
						if (weight > NAPI_POLL_WEIGHT)
 | 
				
			||||||
		netdev_err_once(dev, "%s() called with weight %d\n", __func__,
 | 
							netdev_err_once(dev, "%s() called with weight %d\n", __func__,
 | 
				
			||||||
| 
						 | 
					@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						gro_normal_list(n);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (n->gro_bitmask) {
 | 
						if (n->gro_bitmask) {
 | 
				
			||||||
		/* flush too old packets
 | 
							/* flush too old packets
 | 
				
			||||||
		 * If HZ < 1000, flush all packets.
 | 
							 * If HZ < 1000, flush all packets.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
 | 
				
			||||||
		.mode		= 0644,
 | 
							.mode		= 0644,
 | 
				
			||||||
		.proc_handler	= proc_do_static_key,
 | 
							.proc_handler	= proc_do_static_key,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "gro_normal_batch",
 | 
				
			||||||
 | 
							.data		= &gro_normal_batch,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(unsigned int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dointvec_minmax,
 | 
				
			||||||
 | 
							.extra1		= SYSCTL_ONE,
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ }
 | 
						{ }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue