mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	sched/eevdf: Fix vruntime adjustment on reweight
vruntime of the (on_rq && !0-lag) entity needs to be adjusted when
it gets re-weighted, and the calculations can be simplified based
on the fact that re-weight won't change the w-average of all the
entities. Please check the proofs in comments.
But adjusting vruntime can also cause position change in RB-tree
hence require re-queue to fix up which might be costly. This might
be avoided by deferring adjustment to the time the entity actually
leaves tree (dequeue/pick), but that will negatively affect task
selection and probably not good enough either.
Fixes: 147f3efaa2 ("sched/fair: Implement an EEVDF-like scheduling policy")
Signed-off-by: Abel Wu <wuyun.abel@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231107090510.71322-2-wuyun.abel@bytedance.com
			
			
This commit is contained in:
		
							parent
							
								
									b85ea95d08
								
							
						
					
					
						commit
						eab03c23c2
					
				
					 1 changed files with 128 additions and 23 deletions
				
			
		| 
						 | 
					@ -3666,41 +3666,140 @@ static inline void
 | 
				
			||||||
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 | 
					dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 | 
					static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
 | 
				
			||||||
			   unsigned long weight)
 | 
								   unsigned long weight)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long old_weight = se->load.weight;
 | 
						unsigned long old_weight = se->load.weight;
 | 
				
			||||||
 | 
						u64 avruntime = avg_vruntime(cfs_rq);
 | 
				
			||||||
 | 
						s64 vlag, vslice;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * VRUNTIME
 | 
				
			||||||
 | 
						 * ========
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * COROLLARY #1: The virtual runtime of the entity needs to be
 | 
				
			||||||
 | 
						 * adjusted if re-weight at !0-lag point.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Proof: For contradiction assume this is not true, so we can
 | 
				
			||||||
 | 
						 * re-weight without changing vruntime at !0-lag point.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *             Weight	VRuntime   Avg-VRuntime
 | 
				
			||||||
 | 
						 *     before    w          v            V
 | 
				
			||||||
 | 
						 *      after    w'         v'           V'
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Since lag needs to be preserved through re-weight:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	lag = (V - v)*w = (V'- v')*w', where v = v'
 | 
				
			||||||
 | 
						 *	==>	V' = (V - v)*w/w' + v		(1)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Let W be the total weight of the entities before reweight,
 | 
				
			||||||
 | 
						 * since V' is the new weighted average of entities:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	V' = (WV + w'v - wv) / (W + w' - w)	(2)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * by using (1) & (2) we obtain:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	(WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
 | 
				
			||||||
 | 
						 *	==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
 | 
				
			||||||
 | 
						 *	==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
 | 
				
			||||||
 | 
						 *	==>	(V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Since we are doing at !0-lag point which means V != v, we
 | 
				
			||||||
 | 
						 * can simplify (3):
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	==>	W / (W + w' - w) = w / w'
 | 
				
			||||||
 | 
						 *	==>	Ww' = Ww + ww' - ww
 | 
				
			||||||
 | 
						 *	==>	W * (w' - w) = w * (w' - w)
 | 
				
			||||||
 | 
						 *	==>	W = w	(re-weight indicates w' != w)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * So the cfs_rq contains only one entity, hence vruntime of
 | 
				
			||||||
 | 
						 * the entity @v should always equal to the cfs_rq's weighted
 | 
				
			||||||
 | 
						 * average vruntime @V, which means we will always re-weight
 | 
				
			||||||
 | 
						 * at 0-lag point, thus breach assumption. Proof completed.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * COROLLARY #2: Re-weight does NOT affect weighted average
 | 
				
			||||||
 | 
						 * vruntime of all the entities.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Proof: According to corollary #1, Eq. (1) should be:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	(V - v)*w = (V' - v')*w'
 | 
				
			||||||
 | 
						 *	==>    v' = V' - (V - v)*w/w'		(4)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * According to the weighted average formula, we have:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	V' = (WV - wv + w'v') / (W - w + w')
 | 
				
			||||||
 | 
						 *	   = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
 | 
				
			||||||
 | 
						 *	   = (WV - wv + w'V' - Vw + wv) / (W - w + w')
 | 
				
			||||||
 | 
						 *	   = (WV + w'V' - Vw) / (W - w + w')
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	==>  V'*(W - w + w') = WV + w'V' - Vw
 | 
				
			||||||
 | 
						 *	==>	V' * (W - w) = (W - w) * V	(5)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * If the entity is the only one in the cfs_rq, then reweight
 | 
				
			||||||
 | 
						 * always occurs at 0-lag point, so V won't change. Or else
 | 
				
			||||||
 | 
						 * there are other entities, hence W != w, then Eq. (5) turns
 | 
				
			||||||
 | 
						 * into V' = V. So V won't change in either case, proof done.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * So according to corollary #1 & #2, the effect of re-weight
 | 
				
			||||||
 | 
						 * on vruntime should be:
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	v' = V' - (V - v) * w / w'		(4)
 | 
				
			||||||
 | 
						 *	   = V  - (V - v) * w / w'
 | 
				
			||||||
 | 
						 *	   = V  - vl * w / w'
 | 
				
			||||||
 | 
						 *	   = V  - vl'
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (avruntime != se->vruntime) {
 | 
				
			||||||
 | 
							vlag = (s64)(avruntime - se->vruntime);
 | 
				
			||||||
 | 
							vlag = div_s64(vlag * old_weight, weight);
 | 
				
			||||||
 | 
							se->vruntime = avruntime - vlag;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * DEADLINE
 | 
				
			||||||
 | 
						 * ========
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * When the weight changes, the virtual time slope changes and
 | 
				
			||||||
 | 
						 * we should adjust the relative virtual deadline accordingly.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 *	d' = v' + (d - v)*w/w'
 | 
				
			||||||
 | 
						 *	   = V' - (V - v)*w/w' + (d - v)*w/w'
 | 
				
			||||||
 | 
						 *	   = V  - (V - v)*w/w' + (d - v)*w/w'
 | 
				
			||||||
 | 
						 *	   = V  + (d - V)*w/w'
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						vslice = (s64)(se->deadline - avruntime);
 | 
				
			||||||
 | 
						vslice = div_s64(vslice * old_weight, weight);
 | 
				
			||||||
 | 
						se->deadline = avruntime + vslice;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 | 
				
			||||||
 | 
								    unsigned long weight)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool curr = cfs_rq->curr == se;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (se->on_rq) {
 | 
						if (se->on_rq) {
 | 
				
			||||||
		/* commit outstanding execution time */
 | 
							/* commit outstanding execution time */
 | 
				
			||||||
		if (cfs_rq->curr == se)
 | 
							if (curr)
 | 
				
			||||||
			update_curr(cfs_rq);
 | 
								update_curr(cfs_rq);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			avg_vruntime_sub(cfs_rq, se);
 | 
								__dequeue_entity(cfs_rq, se);
 | 
				
			||||||
		update_load_sub(&cfs_rq->load, se->load.weight);
 | 
							update_load_sub(&cfs_rq->load, se->load.weight);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	dequeue_load_avg(cfs_rq, se);
 | 
						dequeue_load_avg(cfs_rq, se);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	update_load_set(&se->load, weight);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!se->on_rq) {
 | 
						if (!se->on_rq) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
 | 
							 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
 | 
				
			||||||
		 * we need to scale se->vlag when w_i changes.
 | 
							 * we need to scale se->vlag when w_i changes.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		se->vlag = div_s64(se->vlag * old_weight, weight);
 | 
							se->vlag = div_s64(se->vlag * se->load.weight, weight);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		s64 deadline = se->deadline - se->vruntime;
 | 
							reweight_eevdf(cfs_rq, se, weight);
 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * When the weight changes, the virtual time slope changes and
 | 
					 | 
				
			||||||
		 * we should adjust the relative virtual deadline accordingly.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		deadline = div_s64(deadline * old_weight, weight);
 | 
					 | 
				
			||||||
		se->deadline = se->vruntime + deadline;
 | 
					 | 
				
			||||||
		if (se != cfs_rq->curr)
 | 
					 | 
				
			||||||
			min_deadline_cb_propagate(&se->run_node, NULL);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						update_load_set(&se->load, weight);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		u32 divider = get_pelt_divider(&se->avg);
 | 
							u32 divider = get_pelt_divider(&se->avg);
 | 
				
			||||||
| 
						 | 
					@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 | 
				
			||||||
	enqueue_load_avg(cfs_rq, se);
 | 
						enqueue_load_avg(cfs_rq, se);
 | 
				
			||||||
	if (se->on_rq) {
 | 
						if (se->on_rq) {
 | 
				
			||||||
		update_load_add(&cfs_rq->load, se->load.weight);
 | 
							update_load_add(&cfs_rq->load, se->load.weight);
 | 
				
			||||||
		if (cfs_rq->curr != se)
 | 
							if (!curr) {
 | 
				
			||||||
			avg_vruntime_add(cfs_rq, se);
 | 
								/*
 | 
				
			||||||
 | 
								 * The entity's vruntime has been adjusted, so let's check
 | 
				
			||||||
 | 
								 * whether the rq-wide min_vruntime needs updated too. Since
 | 
				
			||||||
 | 
								 * the calculations above require stable min_vruntime rather
 | 
				
			||||||
 | 
								 * than up-to-date one, we do the update at the end of the
 | 
				
			||||||
 | 
								 * reweight process.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								__enqueue_entity(cfs_rq, se);
 | 
				
			||||||
 | 
								update_min_vruntime(cfs_rq);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3857,13 +3965,10 @@ static void update_cfs_group(struct sched_entity *se)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef CONFIG_SMP
 | 
					#ifndef CONFIG_SMP
 | 
				
			||||||
	shares = READ_ONCE(gcfs_rq->tg->shares);
 | 
						shares = READ_ONCE(gcfs_rq->tg->shares);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (likely(se->load.weight == shares))
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	shares = calc_group_shares(gcfs_rq);
 | 
						shares = calc_group_shares(gcfs_rq);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
						if (unlikely(se->load.weight != shares))
 | 
				
			||||||
		reweight_entity(cfs_rq_of(se), se, shares);
 | 
							reweight_entity(cfs_rq_of(se), se, shares);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue