mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	 3a17f23f7c
			
		
	
	
		3a17f23f7c
		
	
	
	
	
		
			
			Executing dql_reset after setting a non-zero value for limit_min can lead to an unreasonable situation where dql->limit is less than dql->limit_min. For instance, after setting /sys/class/net/eth*/queues/tx-0/byte_queue_limits/limit_min, an ifconfig down/up operation might cause the ethernet driver to call netdev_tx_reset_queue, which in turn invokes dql_reset. In this case, dql->limit is reset to 0 while dql->limit_min remains non-zero value, which is unexpected. The limit should always be greater than or equal to limit_min. Signed-off-by: Jing Su <jingsusu@didiglobal.com> Link: https://patch.msgid.link/Z9qHD1s/NEuQBdgH@pilot-ThinkCentre-M930t-N000 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
		
			
				
	
	
		
			217 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			217 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /*
 | |
|  * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
 | |
|  *
 | |
|  * Copyright (c) 2011, Tom Herbert <therbert@google.com>
 | |
|  */
 | |
| #include <linux/types.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/jiffies.h>
 | |
| #include <linux/dynamic_queue_limits.h>
 | |
| #include <linux/compiler.h>
 | |
| #include <linux/export.h>
 | |
| #include <trace/events/napi.h>
 | |
| 
 | |
| #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
 | |
| #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
 | |
| 
 | |
| static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
 | |
| {
 | |
| 	unsigned long now;
 | |
| 
 | |
| 	if (!stall_thrs)
 | |
| 		return;
 | |
| 
 | |
| 	now = jiffies;
 | |
| 	/* Check for a potential stall */
 | |
| 	if (time_after_eq(now, dql->last_reap + stall_thrs)) {
 | |
| 		unsigned long hist_head, t, start, end;
 | |
| 
 | |
| 		/* We are trying to detect a period of at least @stall_thrs
 | |
| 		 * jiffies without any Tx completions, but during first half
 | |
| 		 * of which some Tx was posted.
 | |
| 		 */
 | |
| dqs_again:
 | |
| 		hist_head = READ_ONCE(dql->history_head);
 | |
| 		/* pairs with smp_wmb() in dql_queued() */
 | |
| 		smp_rmb();
 | |
| 
 | |
| 		/* Get the previous entry in the ring buffer, which is the
 | |
| 		 * oldest sample.
 | |
| 		 */
 | |
| 		start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG;
 | |
| 
 | |
| 		/* Advance start to continue from the last reap time */
 | |
| 		if (time_before(start, dql->last_reap + 1))
 | |
| 			start = dql->last_reap + 1;
 | |
| 
 | |
| 		/* Newest sample we should have already seen a completion for */
 | |
| 		end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1);
 | |
| 
 | |
| 		/* Shrink the search space to [start, (now - start_thrs/2)] if
 | |
| 		 * `end` is beyond the stall zone
 | |
| 		 */
 | |
| 		if (time_before(now, end + stall_thrs / 2))
 | |
| 			end = now - stall_thrs / 2;
 | |
| 
 | |
| 		/* Search for the queued time in [t, end] */
 | |
| 		for (t = start; time_before_eq(t, end); t++)
 | |
| 			if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG),
 | |
| 				     dql->history))
 | |
| 				break;
 | |
| 
 | |
| 		/* Variable t contains the time of the queue */
 | |
| 		if (!time_before_eq(t, end))
 | |
| 			goto no_stall;
 | |
| 
 | |
| 		/* The ring buffer was modified in the meantime, retry */
 | |
| 		if (hist_head != READ_ONCE(dql->history_head))
 | |
| 			goto dqs_again;
 | |
| 
 | |
| 		dql->stall_cnt++;
 | |
| 		dql->stall_max = max_t(unsigned short, dql->stall_max, now - t);
 | |
| 
 | |
| 		trace_dql_stall_detected(dql->stall_thrs, now - t,
 | |
| 					 dql->last_reap, dql->history_head,
 | |
| 					 now, dql->history);
 | |
| 	}
 | |
| no_stall:
 | |
| 	dql->last_reap = now;
 | |
| }
 | |
| 
 | |
| /* Records completed count and recalculates the queue limit */
 | |
| void dql_completed(struct dql *dql, unsigned int count)
 | |
| {
 | |
| 	unsigned int inprogress, prev_inprogress, limit;
 | |
| 	unsigned int ovlimit, completed, num_queued;
 | |
| 	unsigned short stall_thrs;
 | |
| 	bool all_prev_completed;
 | |
| 
 | |
| 	num_queued = READ_ONCE(dql->num_queued);
 | |
| 	/* Read stall_thrs in advance since it belongs to the same (first)
 | |
| 	 * cache line as ->num_queued. This way, dql_check_stall() does not
 | |
| 	 * need to touch the first cache line again later, reducing the window
 | |
| 	 * of possible false sharing.
 | |
| 	 */
 | |
| 	stall_thrs = READ_ONCE(dql->stall_thrs);
 | |
| 
 | |
| 	/* Can't complete more than what's in queue */
 | |
| 	BUG_ON(count > num_queued - dql->num_completed);
 | |
| 
 | |
| 	completed = dql->num_completed + count;
 | |
| 	limit = dql->limit;
 | |
| 	ovlimit = POSDIFF(num_queued - dql->num_completed, limit);
 | |
| 	inprogress = num_queued - completed;
 | |
| 	prev_inprogress = dql->prev_num_queued - dql->num_completed;
 | |
| 	all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued);
 | |
| 
 | |
| 	if ((ovlimit && !inprogress) ||
 | |
| 	    (dql->prev_ovlimit && all_prev_completed)) {
 | |
| 		/*
 | |
| 		 * Queue considered starved if:
 | |
| 		 *   - The queue was over-limit in the last interval,
 | |
| 		 *     and there is no more data in the queue.
 | |
| 		 *  OR
 | |
| 		 *   - The queue was over-limit in the previous interval and
 | |
| 		 *     when enqueuing it was possible that all queued data
 | |
| 		 *     had been consumed.  This covers the case when queue
 | |
| 		 *     may have becomes starved between completion processing
 | |
| 		 *     running and next time enqueue was scheduled.
 | |
| 		 *
 | |
| 		 *     When queue is starved increase the limit by the amount
 | |
| 		 *     of bytes both sent and completed in the last interval,
 | |
| 		 *     plus any previous over-limit.
 | |
| 		 */
 | |
| 		limit += POSDIFF(completed, dql->prev_num_queued) +
 | |
| 		     dql->prev_ovlimit;
 | |
| 		dql->slack_start_time = jiffies;
 | |
| 		dql->lowest_slack = UINT_MAX;
 | |
| 	} else if (inprogress && prev_inprogress && !all_prev_completed) {
 | |
| 		/*
 | |
| 		 * Queue was not starved, check if the limit can be decreased.
 | |
| 		 * A decrease is only considered if the queue has been busy in
 | |
| 		 * the whole interval (the check above).
 | |
| 		 *
 | |
| 		 * If there is slack, the amount of excess data queued above
 | |
| 		 * the amount needed to prevent starvation, the queue limit
 | |
| 		 * can be decreased.  To avoid hysteresis we consider the
 | |
| 		 * minimum amount of slack found over several iterations of the
 | |
| 		 * completion routine.
 | |
| 		 */
 | |
| 		unsigned int slack, slack_last_objs;
 | |
| 
 | |
| 		/*
 | |
| 		 * Slack is the maximum of
 | |
| 		 *   - The queue limit plus previous over-limit minus twice
 | |
| 		 *     the number of objects completed.  Note that two times
 | |
| 		 *     number of completed bytes is a basis for an upper bound
 | |
| 		 *     of the limit.
 | |
| 		 *   - Portion of objects in the last queuing operation that
 | |
| 		 *     was not part of non-zero previous over-limit.  That is
 | |
| 		 *     "round down" by non-overlimit portion of the last
 | |
| 		 *     queueing operation.
 | |
| 		 */
 | |
| 		slack = POSDIFF(limit + dql->prev_ovlimit,
 | |
| 		    2 * (completed - dql->num_completed));
 | |
| 		slack_last_objs = dql->prev_ovlimit ?
 | |
| 		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
 | |
| 
 | |
| 		slack = max(slack, slack_last_objs);
 | |
| 
 | |
| 		if (slack < dql->lowest_slack)
 | |
| 			dql->lowest_slack = slack;
 | |
| 
 | |
| 		if (time_after(jiffies,
 | |
| 			       dql->slack_start_time + dql->slack_hold_time)) {
 | |
| 			limit = POSDIFF(limit, dql->lowest_slack);
 | |
| 			dql->slack_start_time = jiffies;
 | |
| 			dql->lowest_slack = UINT_MAX;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Enforce bounds on limit */
 | |
| 	limit = clamp(limit, dql->min_limit, dql->max_limit);
 | |
| 
 | |
| 	if (limit != dql->limit) {
 | |
| 		dql->limit = limit;
 | |
| 		ovlimit = 0;
 | |
| 	}
 | |
| 
 | |
| 	dql->adj_limit = limit + completed;
 | |
| 	dql->prev_ovlimit = ovlimit;
 | |
| 	dql->prev_last_obj_cnt = READ_ONCE(dql->last_obj_cnt);
 | |
| 	dql->num_completed = completed;
 | |
| 	dql->prev_num_queued = num_queued;
 | |
| 
 | |
| 	dql_check_stall(dql, stall_thrs);
 | |
| }
 | |
| EXPORT_SYMBOL(dql_completed);
 | |
| 
 | |
| void dql_reset(struct dql *dql)
 | |
| {
 | |
| 	/* Reset all dynamic values */
 | |
| 	dql->limit = dql->min_limit;
 | |
| 	dql->num_queued = 0;
 | |
| 	dql->num_completed = 0;
 | |
| 	dql->last_obj_cnt = 0;
 | |
| 	dql->prev_num_queued = 0;
 | |
| 	dql->prev_last_obj_cnt = 0;
 | |
| 	dql->prev_ovlimit = 0;
 | |
| 	dql->lowest_slack = UINT_MAX;
 | |
| 	dql->slack_start_time = jiffies;
 | |
| 
 | |
| 	dql->last_reap = jiffies;
 | |
| 	dql->history_head = jiffies / BITS_PER_LONG;
 | |
| 	memset(dql->history, 0, sizeof(dql->history));
 | |
| }
 | |
| EXPORT_SYMBOL(dql_reset);
 | |
| 
 | |
| void dql_init(struct dql *dql, unsigned int hold_time)
 | |
| {
 | |
| 	dql->max_limit = DQL_MAX_LIMIT;
 | |
| 	dql->min_limit = 0;
 | |
| 	dql->slack_hold_time = hold_time;
 | |
| 	dql->stall_thrs = 0;
 | |
| 	dql_reset(dql);
 | |
| }
 | |
| EXPORT_SYMBOL(dql_init);
 |