mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: sched: fix tx action rescheduling issue during deactivation
Currently qdisc_run() checks the STATE_DEACTIVATED of lockless
qdisc before calling __qdisc_run(), which ultimately clear the
STATE_MISSED when all the skb is dequeued. If STATE_DEACTIVATED
is set before clearing STATE_MISSED, there may be rescheduling
of net_tx_action() at the end of qdisc_run_end(), see below:
CPU0(net_tx_atcion)  CPU1(__dev_xmit_skb)  CPU2(dev_deactivate)
          .                   .                     .
          .            set STATE_MISSED             .
          .           __netif_schedule()            .
          .                   .           set STATE_DEACTIVATED
          .                   .                qdisc_reset()
          .                   .                     .
          .<---------------   .              synchronize_net()
clear __QDISC_STATE_SCHED  |  .                     .
          .                |  .                     .
          .                |  .            some_qdisc_is_busy()
          .                |  .               return *false*
          .                |  .                     .
  test STATE_DEACTIVATED   |  .                     .
__qdisc_run() *not* called |  .                     .
          .                |  .                     .
   test STATE_MISS         |  .                     .
 __netif_schedule()--------|  .                     .
          .                   .                     .
          .                   .                     .
__qdisc_run() is not called by net_tx_atcion() in CPU0 because
CPU2 has set STATE_DEACTIVATED flag during dev_deactivate(), and
STATE_MISSED is only cleared in __qdisc_run(), __netif_schedule
is called at the end of qdisc_run_end(), causing tx action
rescheduling problem.
qdisc_run() called by net_tx_action() runs in the softirq context,
which should has the same semantic as the qdisc_run() called by
__dev_xmit_skb() protected by rcu_read_lock_bh(). And there is a
synchronize_net() between STATE_DEACTIVATED flag being set and
qdisc_reset()/some_qdisc_is_busy in dev_deactivate(), we can safely
bail out for the deactived lockless qdisc in net_tx_action(), and
qdisc_reset() will reset all skb not dequeued yet.
So add the rcu_read_lock() explicitly to protect the qdisc_run()
and do the STATE_DEACTIVATED checking in net_tx_action() before
calling qdisc_run_begin(). Another option is to do the checking in
the qdisc_run_end(), but it will add unnecessary overhead for
non-tx_action case, because __dev_queue_xmit() will not see qdisc
with STATE_DEACTIVATED after synchronize_net(), the qdisc with
STATE_DEACTIVATED can only be seen by net_tx_action() because of
__netif_schedule().
The STATE_DEACTIVATED checking in qdisc_run() is to avoid race
between net_tx_action() and qdisc_reset(), see:
commit d518d2ed86 ("net/sched: fix race between deactivation
and dequeue for NOLOCK qdisc"). As the bailout added above for
deactived lockless qdisc in net_tx_action() provides better
protection for the race without calling qdisc_run() at all, so
remove the STATE_DEACTIVATED checking in qdisc_run().
After qdisc_reset(), there is no skb in qdisc to be dequeued, so
clear the STATE_MISSED in dev_reset_queue() too.
Fixes: 6b3ba9146f ("net: sched: allow qdiscs to handle locking")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
V8: Clearing STATE_MISSED before calling __netif_schedule() has
    avoid the endless rescheduling problem, but there may still
    be a unnecessary rescheduling, so adjust the commit log.
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									a90c57f2ce
								
							
						
					
					
						commit
						102b55ee92
					
				
					 3 changed files with 26 additions and 11 deletions
				
			
		| 
						 | 
					@ -128,12 +128,7 @@ void __qdisc_run(struct Qdisc *q);
 | 
				
			||||||
static inline void qdisc_run(struct Qdisc *q)
 | 
					static inline void qdisc_run(struct Qdisc *q)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (qdisc_run_begin(q)) {
 | 
						if (qdisc_run_begin(q)) {
 | 
				
			||||||
		/* NOLOCK qdisc must check 'state' under the qdisc seqlock
 | 
							__qdisc_run(q);
 | 
				
			||||||
		 * to avoid racing with dev_qdisc_reset()
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (!(q->flags & TCQ_F_NOLOCK) ||
 | 
					 | 
				
			||||||
		    likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
 | 
					 | 
				
			||||||
			__qdisc_run(q);
 | 
					 | 
				
			||||||
		qdisc_run_end(q);
 | 
							qdisc_run_end(q);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5025,25 +5025,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 | 
				
			||||||
		sd->output_queue_tailp = &sd->output_queue;
 | 
							sd->output_queue_tailp = &sd->output_queue;
 | 
				
			||||||
		local_irq_enable();
 | 
							local_irq_enable();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rcu_read_lock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		while (head) {
 | 
							while (head) {
 | 
				
			||||||
			struct Qdisc *q = head;
 | 
								struct Qdisc *q = head;
 | 
				
			||||||
			spinlock_t *root_lock = NULL;
 | 
								spinlock_t *root_lock = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			head = head->next_sched;
 | 
								head = head->next_sched;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (!(q->flags & TCQ_F_NOLOCK)) {
 | 
					 | 
				
			||||||
				root_lock = qdisc_lock(q);
 | 
					 | 
				
			||||||
				spin_lock(root_lock);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			/* We need to make sure head->next_sched is read
 | 
								/* We need to make sure head->next_sched is read
 | 
				
			||||||
			 * before clearing __QDISC_STATE_SCHED
 | 
								 * before clearing __QDISC_STATE_SCHED
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			smp_mb__before_atomic();
 | 
								smp_mb__before_atomic();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (!(q->flags & TCQ_F_NOLOCK)) {
 | 
				
			||||||
 | 
									root_lock = qdisc_lock(q);
 | 
				
			||||||
 | 
									spin_lock(root_lock);
 | 
				
			||||||
 | 
								} else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
 | 
				
			||||||
 | 
											     &q->state))) {
 | 
				
			||||||
 | 
									/* There is a synchronize_net() between
 | 
				
			||||||
 | 
									 * STATE_DEACTIVATED flag being set and
 | 
				
			||||||
 | 
									 * qdisc_reset()/some_qdisc_is_busy() in
 | 
				
			||||||
 | 
									 * dev_deactivate(), so we can safely bail out
 | 
				
			||||||
 | 
									 * early here to avoid data race between
 | 
				
			||||||
 | 
									 * qdisc_deactivate() and some_qdisc_is_busy()
 | 
				
			||||||
 | 
									 * for lockless qdisc.
 | 
				
			||||||
 | 
									 */
 | 
				
			||||||
 | 
									clear_bit(__QDISC_STATE_SCHED, &q->state);
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			clear_bit(__QDISC_STATE_SCHED, &q->state);
 | 
								clear_bit(__QDISC_STATE_SCHED, &q->state);
 | 
				
			||||||
			qdisc_run(q);
 | 
								qdisc_run(q);
 | 
				
			||||||
			if (root_lock)
 | 
								if (root_lock)
 | 
				
			||||||
				spin_unlock(root_lock);
 | 
									spin_unlock(root_lock);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xfrm_dev_backlog(sd);
 | 
						xfrm_dev_backlog(sd);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1177,8 +1177,10 @@ static void dev_reset_queue(struct net_device *dev,
 | 
				
			||||||
	qdisc_reset(qdisc);
 | 
						qdisc_reset(qdisc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_unlock_bh(qdisc_lock(qdisc));
 | 
						spin_unlock_bh(qdisc_lock(qdisc));
 | 
				
			||||||
	if (nolock)
 | 
						if (nolock) {
 | 
				
			||||||
 | 
							clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
 | 
				
			||||||
		spin_unlock_bh(&qdisc->seqlock);
 | 
							spin_unlock_bh(&qdisc->seqlock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool some_qdisc_is_busy(struct net_device *dev)
 | 
					static bool some_qdisc_is_busy(struct net_device *dev)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue