mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	sched/fair: Block nohz tick_stop when cfs bandwidth in use
CFS bandwidth limits and NOHZ full don't play well together. Tasks can easily run well past their quotas before a remote tick does accounting. This leads to long, multi-period stalls before such tasks can run again. Currently, when presented with these conflicting requirements the scheduler is favoring nohz_full and letting the tick be stopped. However, nohz tick stopping is already best-effort, there are a number of conditions that can prevent it, whereas cfs runtime bandwidth is expected to be enforced. Make the scheduler favor bandwidth over stopping the tick by setting TICK_DEP_BIT_SCHED when the only running task is a cfs task with runtime limit enabled. We use cfs_b->hierarchical_quota to determine if the task requires the tick. Add check in pick_next_task_fair() as well since that is where we have a handle on the task that is actually going to be running. Add check in sched_can_stop_tick() to cover some edge cases such as nr_running going from 2->1 and the 1 remains the running task. Reviewed-By: Ben Segall <bsegall@google.com> Signed-off-by: Phil Auld <pauld@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230712133357.381137-3-pauld@redhat.com
This commit is contained in:
		
							parent
							
								
									c98c18270b
								
							
						
					
					
						commit
						88c56cfeae
					
				
					 4 changed files with 81 additions and 1 deletions
				
			
		|  | @ -1194,6 +1194,20 @@ static void nohz_csd_func(void *info) | |||
| #endif /* CONFIG_NO_HZ_COMMON */ | ||||
| 
 | ||||
| #ifdef CONFIG_NO_HZ_FULL | ||||
| static inline bool __need_bw_check(struct rq *rq, struct task_struct *p) | ||||
| { | ||||
| 	if (rq->nr_running != 1) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (p->sched_class != &fair_sched_class) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (!task_on_rq_queued(p)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| bool sched_can_stop_tick(struct rq *rq) | ||||
| { | ||||
| 	int fifo_nr_running; | ||||
|  | @ -1229,6 +1243,18 @@ bool sched_can_stop_tick(struct rq *rq) | |||
| 	if (rq->nr_running > 1) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If there is one task and it has CFS runtime bandwidth constraints | ||||
| 	 * and it's on the cpu now we don't want to stop the tick. | ||||
| 	 * This check prevents clearing the bit if a newly enqueued task here is | ||||
| 	 * dequeued by migrating while the constrained task continues to run. | ||||
| 	 * E.g. going from 2->1 without going through pick_next_task(). | ||||
| 	 */ | ||||
| 	if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) { | ||||
| 		if (cfs_task_bw_constrained(rq->curr)) | ||||
| 			return false; | ||||
| 	} | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| #endif /* CONFIG_NO_HZ_FULL */ | ||||
|  |  | |||
|  | @ -6189,6 +6189,46 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) | |||
| 	rq_clock_stop_loop_update(rq); | ||||
| } | ||||
| 
 | ||||
| bool cfs_task_bw_constrained(struct task_struct *p) | ||||
| { | ||||
| 	struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||||
| 
 | ||||
| 	if (!cfs_bandwidth_used()) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (cfs_rq->runtime_enabled || | ||||
| 	    tg_cfs_bandwidth(cfs_rq->tg)->hierarchical_quota != RUNTIME_INF) | ||||
| 		return true; | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_NO_HZ_FULL | ||||
| /* called from pick_next_task_fair() */ | ||||
| static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p) | ||||
| { | ||||
| 	int cpu = cpu_of(rq); | ||||
| 
 | ||||
| 	if (!sched_feat(HZ_BW) || !cfs_bandwidth_used()) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (!tick_nohz_full_cpu(cpu)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (rq->nr_running != 1) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 *  We know there is only one task runnable and we've just picked it. The | ||||
| 	 *  normal enqueue path will have cleared TICK_DEP_BIT_SCHED if we will | ||||
| 	 *  be otherwise able to stop the tick. Just need to check if we are using | ||||
| 	 *  bandwidth control. | ||||
| 	 */ | ||||
| 	if (cfs_task_bw_constrained(p)) | ||||
| 		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #else /* CONFIG_CFS_BANDWIDTH */ | ||||
| 
 | ||||
| static inline bool cfs_bandwidth_used(void) | ||||
|  | @ -6231,9 +6271,18 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
| static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||||
| static inline void update_runtime_enabled(struct rq *rq) {} | ||||
| static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} | ||||
| 
 | ||||
| #ifdef CONFIG_CGROUP_SCHED | ||||
| bool cfs_task_bw_constrained(struct task_struct *p) | ||||
| { | ||||
| 	return false; | ||||
| } | ||||
| #endif | ||||
| #endif /* CONFIG_CFS_BANDWIDTH */ | ||||
| 
 | ||||
| #if !defined(CONFIG_CFS_BANDWIDTH) || !defined(CONFIG_NO_HZ_FULL) | ||||
| static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p) {} | ||||
| #endif | ||||
| 
 | ||||
| /**************************************************
 | ||||
|  * CFS operations on tasks: | ||||
|  */ | ||||
|  | @ -8201,6 +8250,7 @@ done: __maybe_unused; | |||
| 		hrtick_start_fair(rq, p); | ||||
| 
 | ||||
| 	update_misfit_status(p, rq); | ||||
| 	sched_fair_update_stop_tick(rq, p); | ||||
| 
 | ||||
| 	return p; | ||||
| 
 | ||||
|  |  | |||
|  | @ -101,3 +101,5 @@ SCHED_FEAT(LATENCY_WARN, false) | |||
| 
 | ||||
| SCHED_FEAT(ALT_PERIOD, true) | ||||
| SCHED_FEAT(BASE_SLICE, true) | ||||
| 
 | ||||
| SCHED_FEAT(HZ_BW, true) | ||||
|  |  | |||
|  | @ -459,6 +459,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth | |||
| extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); | ||||
| extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); | ||||
| extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); | ||||
| extern bool cfs_task_bw_constrained(struct task_struct *p); | ||||
| 
 | ||||
| extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | ||||
| 		struct sched_rt_entity *rt_se, int cpu, | ||||
|  | @ -494,6 +495,7 @@ static inline void set_task_rq_fair(struct sched_entity *se, | |||
| #else /* CONFIG_CGROUP_SCHED */ | ||||
| 
 | ||||
| struct cfs_bandwidth { }; | ||||
| static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; } | ||||
| 
 | ||||
| #endif	/* CONFIG_CGROUP_SCHED */ | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Phil Auld
						Phil Auld