forked from mirrors/linux
		
	net: implement threaded-able napi poll loop support
This patch allows running each napi poll loop inside its own kernel thread. The kthread is created during netif_napi_add() if dev->threaded is set. And threaded mode is enabled in napi_enable(). We will provide a way to set dev->threaded and enable threaded mode without a device up/down in the following patch. Once that threaded mode is enabled and the kthread is started, napi_schedule() will wake-up such thread instead of scheduling the softirq. The threaded poll loop behaves quite likely the net_rx_action, but it does not have to manipulate local irqs and uses an explicit scheduling point based on netdev_budget. Co-developed-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Co-developed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Wei Wang <weiwan@google.com> Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									898f8015ff
								
							
						
					
					
						commit
						29863d41bb
					
				
					 2 changed files with 119 additions and 14 deletions
				
			
		|  | @ -347,6 +347,7 @@ struct napi_struct { | ||||||
| 	struct list_head	dev_list; | 	struct list_head	dev_list; | ||||||
| 	struct hlist_node	napi_hash_node; | 	struct hlist_node	napi_hash_node; | ||||||
| 	unsigned int		napi_id; | 	unsigned int		napi_id; | ||||||
|  | 	struct task_struct	*thread; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum { | enum { | ||||||
|  | @ -358,6 +359,7 @@ enum { | ||||||
| 	NAPI_STATE_NO_BUSY_POLL,	/* Do not add in napi_hash, no busy polling */ | 	NAPI_STATE_NO_BUSY_POLL,	/* Do not add in napi_hash, no busy polling */ | ||||||
| 	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */ | 	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */ | ||||||
| 	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/ | 	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/ | ||||||
|  | 	NAPI_STATE_THREADED,		/* The poll is performed inside its own thread*/ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum { | enum { | ||||||
|  | @ -369,6 +371,7 @@ enum { | ||||||
| 	NAPIF_STATE_NO_BUSY_POLL	= BIT(NAPI_STATE_NO_BUSY_POLL), | 	NAPIF_STATE_NO_BUSY_POLL	= BIT(NAPI_STATE_NO_BUSY_POLL), | ||||||
| 	NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL), | 	NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL), | ||||||
| 	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL), | 	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL), | ||||||
|  | 	NAPIF_STATE_THREADED		= BIT(NAPI_STATE_THREADED), | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum gro_result { | enum gro_result { | ||||||
|  | @ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n) | ||||||
|  */ |  */ | ||||||
| void napi_disable(struct napi_struct *n); | void napi_disable(struct napi_struct *n); | ||||||
| 
 | 
 | ||||||
| /**
 | void napi_enable(struct napi_struct *n); | ||||||
|  *	napi_enable - enable NAPI scheduling |  | ||||||
|  *	@n: NAPI context |  | ||||||
|  * |  | ||||||
|  * Resume NAPI from being scheduled on this context. |  | ||||||
|  * Must be paired with napi_disable. |  | ||||||
|  */ |  | ||||||
| static inline void napi_enable(struct napi_struct *n) |  | ||||||
| { |  | ||||||
| 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |  | ||||||
| 	smp_mb__before_atomic(); |  | ||||||
| 	clear_bit(NAPI_STATE_SCHED, &n->state); |  | ||||||
| 	clear_bit(NAPI_STATE_NPSVC, &n->state); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  *	napi_synchronize - wait until NAPI is not running |  *	napi_synchronize - wait until NAPI is not running | ||||||
|  | @ -1827,6 +1817,8 @@ enum netdev_priv_flags { | ||||||
|  * |  * | ||||||
|  *	@wol_enabled:	Wake-on-LAN is enabled |  *	@wol_enabled:	Wake-on-LAN is enabled | ||||||
|  * |  * | ||||||
|  |  *	@threaded:	napi threaded mode is enabled | ||||||
|  |  * | ||||||
|  *	@net_notifier_list:	List of per-net netdev notifier block |  *	@net_notifier_list:	List of per-net netdev notifier block | ||||||
|  *				that follow this device when it is moved |  *				that follow this device when it is moved | ||||||
|  *				to another network namespace. |  *				to another network namespace. | ||||||
|  | @ -2145,6 +2137,7 @@ struct net_device { | ||||||
| 	struct lock_class_key	*qdisc_running_key; | 	struct lock_class_key	*qdisc_running_key; | ||||||
| 	bool			proto_down; | 	bool			proto_down; | ||||||
| 	unsigned		wol_enabled:1; | 	unsigned		wol_enabled:1; | ||||||
|  | 	unsigned		threaded:1; | ||||||
| 
 | 
 | ||||||
| 	struct list_head	net_notifier_list; | 	struct list_head	net_notifier_list; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										112
									
								
								net/core/dev.c
									
									
									
									
									
								
							
							
						
						
									
										112
									
								
								net/core/dev.c
									
									
									
									
									
								
							|  | @ -91,6 +91,7 @@ | ||||||
| #include <linux/etherdevice.h> | #include <linux/etherdevice.h> | ||||||
| #include <linux/ethtool.h> | #include <linux/ethtool.h> | ||||||
| #include <linux/skbuff.h> | #include <linux/skbuff.h> | ||||||
|  | #include <linux/kthread.h> | ||||||
| #include <linux/bpf.h> | #include <linux/bpf.h> | ||||||
| #include <linux/bpf_trace.h> | #include <linux/bpf_trace.h> | ||||||
| #include <net/net_namespace.h> | #include <net/net_namespace.h> | ||||||
|  | @ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(netdev_notify_peers); | EXPORT_SYMBOL(netdev_notify_peers); | ||||||
| 
 | 
 | ||||||
|  | static int napi_threaded_poll(void *data); | ||||||
|  | 
 | ||||||
|  | static int napi_kthread_create(struct napi_struct *n) | ||||||
|  | { | ||||||
|  | 	int err = 0; | ||||||
|  | 
 | ||||||
|  | 	/* Create and wake up the kthread once to put it in
 | ||||||
|  | 	 * TASK_INTERRUPTIBLE mode to avoid the blocked task | ||||||
|  | 	 * warning and work with loadavg. | ||||||
|  | 	 */ | ||||||
|  | 	n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", | ||||||
|  | 				n->dev->name, n->napi_id); | ||||||
|  | 	if (IS_ERR(n->thread)) { | ||||||
|  | 		err = PTR_ERR(n->thread); | ||||||
|  | 		pr_err("kthread_run failed with err %d\n", err); | ||||||
|  | 		n->thread = NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) | static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) | ||||||
| { | { | ||||||
| 	const struct net_device_ops *ops = dev->netdev_ops; | 	const struct net_device_ops *ops = dev->netdev_ops; | ||||||
|  | @ -4265,6 +4287,21 @@ int gro_normal_batch __read_mostly = 8; | ||||||
| static inline void ____napi_schedule(struct softnet_data *sd, | static inline void ____napi_schedule(struct softnet_data *sd, | ||||||
| 				     struct napi_struct *napi) | 				     struct napi_struct *napi) | ||||||
| { | { | ||||||
|  | 	struct task_struct *thread; | ||||||
|  | 
 | ||||||
|  | 	if (test_bit(NAPI_STATE_THREADED, &napi->state)) { | ||||||
|  | 		/* Paired with smp_mb__before_atomic() in
 | ||||||
|  | 		 * napi_enable(). Use READ_ONCE() to guarantee | ||||||
|  | 		 * a complete read on napi->thread. Only call | ||||||
|  | 		 * wake_up_process() when it's not NULL. | ||||||
|  | 		 */ | ||||||
|  | 		thread = READ_ONCE(napi->thread); | ||||||
|  | 		if (thread) { | ||||||
|  | 			wake_up_process(thread); | ||||||
|  | 			return; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	list_add_tail(&napi->poll_list, &sd->poll_list); | 	list_add_tail(&napi->poll_list, &sd->poll_list); | ||||||
| 	__raise_softirq_irqoff(NET_RX_SOFTIRQ); | 	__raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||||||
| } | } | ||||||
|  | @ -6728,6 +6765,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, | ||||||
| 	set_bit(NAPI_STATE_NPSVC, &napi->state); | 	set_bit(NAPI_STATE_NPSVC, &napi->state); | ||||||
| 	list_add_rcu(&napi->dev_list, &dev->napi_list); | 	list_add_rcu(&napi->dev_list, &dev->napi_list); | ||||||
| 	napi_hash_add(napi); | 	napi_hash_add(napi); | ||||||
|  | 	/* Create kthread for this napi if dev->threaded is set.
 | ||||||
|  | 	 * Clear dev->threaded if kthread creation failed so that | ||||||
|  | 	 * threaded mode will not be enabled in napi_enable(). | ||||||
|  | 	 */ | ||||||
|  | 	if (dev->threaded && napi_kthread_create(napi)) | ||||||
|  | 		dev->threaded = 0; | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(netif_napi_add); | EXPORT_SYMBOL(netif_napi_add); | ||||||
| 
 | 
 | ||||||
|  | @ -6745,9 +6788,28 @@ void napi_disable(struct napi_struct *n) | ||||||
| 
 | 
 | ||||||
| 	clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); | 	clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); | ||||||
| 	clear_bit(NAPI_STATE_DISABLE, &n->state); | 	clear_bit(NAPI_STATE_DISABLE, &n->state); | ||||||
|  | 	clear_bit(NAPI_STATE_THREADED, &n->state); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(napi_disable); | EXPORT_SYMBOL(napi_disable); | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  *	napi_enable - enable NAPI scheduling | ||||||
|  |  *	@n: NAPI context | ||||||
|  |  * | ||||||
|  |  * Resume NAPI from being scheduled on this context. | ||||||
|  |  * Must be paired with napi_disable. | ||||||
|  |  */ | ||||||
|  | void napi_enable(struct napi_struct *n) | ||||||
|  | { | ||||||
|  | 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); | ||||||
|  | 	smp_mb__before_atomic(); | ||||||
|  | 	clear_bit(NAPI_STATE_SCHED, &n->state); | ||||||
|  | 	clear_bit(NAPI_STATE_NPSVC, &n->state); | ||||||
|  | 	if (n->dev->threaded && n->thread) | ||||||
|  | 		set_bit(NAPI_STATE_THREADED, &n->state); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(napi_enable); | ||||||
|  | 
 | ||||||
| static void flush_gro_hash(struct napi_struct *napi) | static void flush_gro_hash(struct napi_struct *napi) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
|  | @ -6773,6 +6835,11 @@ void __netif_napi_del(struct napi_struct *napi) | ||||||
| 
 | 
 | ||||||
| 	flush_gro_hash(napi); | 	flush_gro_hash(napi); | ||||||
| 	napi->gro_bitmask = 0; | 	napi->gro_bitmask = 0; | ||||||
|  | 
 | ||||||
|  | 	if (napi->thread) { | ||||||
|  | 		kthread_stop(napi->thread); | ||||||
|  | 		napi->thread = NULL; | ||||||
|  | 	} | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(__netif_napi_del); | EXPORT_SYMBOL(__netif_napi_del); | ||||||
| 
 | 
 | ||||||
|  | @ -6867,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) | ||||||
| 	return work; | 	return work; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int napi_thread_wait(struct napi_struct *napi) | ||||||
|  | { | ||||||
|  | 	set_current_state(TASK_INTERRUPTIBLE); | ||||||
|  | 
 | ||||||
|  | 	while (!kthread_should_stop() && !napi_disable_pending(napi)) { | ||||||
|  | 		if (test_bit(NAPI_STATE_SCHED, &napi->state)) { | ||||||
|  | 			WARN_ON(!list_empty(&napi->poll_list)); | ||||||
|  | 			__set_current_state(TASK_RUNNING); | ||||||
|  | 			return 0; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		schedule(); | ||||||
|  | 		set_current_state(TASK_INTERRUPTIBLE); | ||||||
|  | 	} | ||||||
|  | 	__set_current_state(TASK_RUNNING); | ||||||
|  | 	return -1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int napi_threaded_poll(void *data) | ||||||
|  | { | ||||||
|  | 	struct napi_struct *napi = data; | ||||||
|  | 	void *have; | ||||||
|  | 
 | ||||||
|  | 	while (!napi_thread_wait(napi)) { | ||||||
|  | 		for (;;) { | ||||||
|  | 			bool repoll = false; | ||||||
|  | 
 | ||||||
|  | 			local_bh_disable(); | ||||||
|  | 
 | ||||||
|  | 			have = netpoll_poll_lock(napi); | ||||||
|  | 			__napi_poll(napi, &repoll); | ||||||
|  | 			netpoll_poll_unlock(have); | ||||||
|  | 
 | ||||||
|  | 			__kfree_skb_flush(); | ||||||
|  | 			local_bh_enable(); | ||||||
|  | 
 | ||||||
|  | 			if (!repoll) | ||||||
|  | 				break; | ||||||
|  | 
 | ||||||
|  | 			cond_resched(); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static __latent_entropy void net_rx_action(struct softirq_action *h) | static __latent_entropy void net_rx_action(struct softirq_action *h) | ||||||
| { | { | ||||||
| 	struct softnet_data *sd = this_cpu_ptr(&softnet_data); | 	struct softnet_data *sd = this_cpu_ptr(&softnet_data); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Wei Wang
						Wei Wang