mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	sched/fair: Align PELT windows between cfs_rq and its se
The PELT _sum values are a saw-tooth function, dropping on the decay edge and then growing back up again during the window. When these window-edges are not aligned between cfs_rq and se, we can have the situation where, for example, on dequeue, the se decays first. Its _sum values will be small(er), while the cfs_rq _sum values will still be on their way up. Because of this, the subtraction: cfs_rq->avg._sum -= se->avg._sum will result in a positive value. This will then, once the cfs_rq reaches an edge, translate into its _avg value jumping up. This is especially visible with the runnable_load bits, since they get added/subtracted a lot. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									144d8487bc
								
							
						
					
					
						commit
						f207934fb7
					
				
					 1 changed files with 31 additions and 14 deletions
				
			
		| 
						 | 
				
			
			@ -716,13 +716,8 @@ void init_entity_runnable_average(struct sched_entity *se)
 | 
			
		|||
{
 | 
			
		||||
	struct sched_avg *sa = &se->avg;
 | 
			
		||||
 | 
			
		||||
	sa->last_update_time = 0;
 | 
			
		||||
	/*
 | 
			
		||||
	 * sched_avg's period_contrib should be strictly less then 1024, so
 | 
			
		||||
	 * we give it 1023 to make sure it is almost a period (1024us), and
 | 
			
		||||
	 * will definitely be update (after enqueue).
 | 
			
		||||
	 */
 | 
			
		||||
	sa->period_contrib = 1023;
 | 
			
		||||
	memset(sa, 0, sizeof(*sa));
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Tasks are intialized with full load to be seen as heavy tasks until
 | 
			
		||||
	 * they get a chance to stabilize to their real load level.
 | 
			
		||||
| 
						 | 
				
			
			@ -731,13 +726,9 @@ void init_entity_runnable_average(struct sched_entity *se)
 | 
			
		|||
	 */
 | 
			
		||||
	if (entity_is_task(se))
 | 
			
		||||
		sa->runnable_load_avg = sa->load_avg = scale_load_down(se->load.weight);
 | 
			
		||||
	sa->runnable_load_sum = sa->load_sum = LOAD_AVG_MAX;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * At this point, util_avg won't be used in select_task_rq_fair anyway
 | 
			
		||||
	 */
 | 
			
		||||
	sa->util_avg = 0;
 | 
			
		||||
	sa->util_sum = 0;
 | 
			
		||||
	se->runnable_weight = se->load.weight;
 | 
			
		||||
 | 
			
		||||
	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -785,7 +776,6 @@ void post_init_entity_util_avg(struct sched_entity *se)
 | 
			
		|||
		} else {
 | 
			
		||||
			sa->util_avg = cap;
 | 
			
		||||
		}
 | 
			
		||||
		sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (entity_is_task(se)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -3632,7 +3622,34 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 | 
			
		|||
 */
 | 
			
		||||
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 | 
			
		||||
{
 | 
			
		||||
	u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * When we attach the @se to the @cfs_rq, we must align the decay
 | 
			
		||||
	 * window because without that, really weird and wonderful things can
 | 
			
		||||
	 * happen.
 | 
			
		||||
	 *
 | 
			
		||||
	 * XXX illustrate
 | 
			
		||||
	 */
 | 
			
		||||
	se->avg.last_update_time = cfs_rq->avg.last_update_time;
 | 
			
		||||
	se->avg.period_contrib = cfs_rq->avg.period_contrib;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Hell(o) Nasty stuff.. we need to recompute _sum based on the new
 | 
			
		||||
	 * period_contrib. This isn't strictly correct, but since we're
 | 
			
		||||
	 * entirely outside of the PELT hierarchy, nobody cares if we truncate
 | 
			
		||||
	 * _sum a little.
 | 
			
		||||
	 */
 | 
			
		||||
	se->avg.util_sum = se->avg.util_avg * divider;
 | 
			
		||||
 | 
			
		||||
	se->avg.load_sum = divider;
 | 
			
		||||
	if (se_weight(se)) {
 | 
			
		||||
		se->avg.load_sum =
 | 
			
		||||
			div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	se->avg.runnable_load_sum = se->avg.load_sum;
 | 
			
		||||
 | 
			
		||||
	enqueue_load_avg(cfs_rq, se);
 | 
			
		||||
	cfs_rq->avg.util_avg += se->avg.util_avg;
 | 
			
		||||
	cfs_rq->avg.util_sum += se->avg.util_sum;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue