mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	cpufreq: Add mechanism for registering utilization update callbacks
Introduce a mechanism by which parts of the cpufreq subsystem
("setpolicy" drivers or the core) can register callbacks to be
executed from cpufreq_update_util() which is invoked by the
scheduler's update_load_avg() on CPU utilization changes.
This allows the "setpolicy" drivers to dispense with their timers
and do all of the computations they need and frequency/voltage
adjustments in the update_load_avg() code path, among other things.
The update_load_avg() changes were suggested by Peter Zijlstra.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									de1df26b7c
								
							
						
					
					
						commit
						34e2c555f3
					
				
					 6 changed files with 113 additions and 1 deletions
				
			
		|  | @ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list); | |||
| static struct cpufreq_driver *cpufreq_driver; | ||||
| static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); | ||||
| static DEFINE_RWLOCK(cpufreq_driver_lock); | ||||
| 
 | ||||
| static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); | ||||
| 
 | ||||
| /**
 | ||||
|  * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. | ||||
|  * @cpu: The CPU to set the pointer for. | ||||
|  * @data: New pointer value. | ||||
|  * | ||||
|  * Set and publish the update_util_data pointer for the given CPU.  That pointer | ||||
|  * points to a struct update_util_data object containing a callback function | ||||
|  * to call from cpufreq_update_util().  That function will be called from an RCU | ||||
|  * read-side critical section, so it must not sleep. | ||||
|  * | ||||
|  * Callers must use RCU callbacks to free any memory that might be accessed | ||||
|  * via the old update_util_data pointer or invoke synchronize_rcu() right after | ||||
|  * this function to avoid use-after-free. | ||||
|  */ | ||||
| void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) | ||||
| { | ||||
| 	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data); | ||||
| 
 | ||||
| /**
 | ||||
|  * cpufreq_update_util - Take a note about CPU utilization changes. | ||||
|  * @time: Current time. | ||||
|  * @util: Current utilization. | ||||
|  * @max: Utilization ceiling. | ||||
|  * | ||||
|  * This function is called by the scheduler on every invocation of | ||||
|  * update_load_avg() on the CPU whose utilization is being updated. | ||||
|  */ | ||||
| void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) | ||||
| { | ||||
| 	struct update_util_data *data; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 
 | ||||
| 	data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data)); | ||||
| 	if (data && data->func) | ||||
| 		data->func(data, time, util, max); | ||||
| 
 | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
| DEFINE_MUTEX(cpufreq_governor_lock); | ||||
| 
 | ||||
| /* Flag to suspend/resume CPUFreq governors */ | ||||
|  |  | |||
|  | @ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) | |||
| extern struct kobject *cpufreq_global_kobject; | ||||
| 
 | ||||
| #ifdef CONFIG_CPU_FREQ | ||||
| void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); | ||||
| 
 | ||||
| /**
 | ||||
|  * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. | ||||
|  * @time: Current time. | ||||
|  * | ||||
|  * The way cpufreq is currently arranged requires it to evaluate the CPU | ||||
|  * performance state (frequency/voltage) on a regular basis to prevent it from | ||||
|  * being stuck in a completely inadequate performance level for too long. | ||||
|  * That is not guaranteed to happen if the updates are only triggered from CFS, | ||||
|  * though, because they may not be coming in if RT or deadline tasks are active | ||||
|  * all the time (or there are RT and DL tasks only). | ||||
|  * | ||||
|  * As a workaround for that issue, this function is called by the RT and DL | ||||
|  * sched classes to trigger extra cpufreq updates to prevent it from stalling, | ||||
|  * but that really is a band-aid.  Going forward it should be replaced with | ||||
|  * solutions targeted more specifically at RT and DL tasks. | ||||
|  */ | ||||
| static inline void cpufreq_trigger_update(u64 time) | ||||
| { | ||||
| 	cpufreq_update_util(time, ULONG_MAX, 0); | ||||
| } | ||||
| 
 | ||||
| struct update_util_data { | ||||
| 	void (*func)(struct update_util_data *data, | ||||
| 		     u64 time, unsigned long util, unsigned long max); | ||||
| }; | ||||
| 
 | ||||
| void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); | ||||
| 
 | ||||
| unsigned int cpufreq_get(unsigned int cpu); | ||||
| unsigned int cpufreq_quick_get(unsigned int cpu); | ||||
| unsigned int cpufreq_quick_get_max(unsigned int cpu); | ||||
|  | @ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu); | |||
| bool have_governor_per_policy(void); | ||||
| struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); | ||||
| #else | ||||
| static inline void cpufreq_update_util(u64 time, unsigned long util, | ||||
| 				       unsigned long max) {} | ||||
| static inline void cpufreq_trigger_update(u64 time) {} | ||||
| 
 | ||||
| static inline unsigned int cpufreq_get(unsigned int cpu) | ||||
| { | ||||
| 	return 0; | ||||
|  |  | |||
|  | @ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq) | |||
| 	if (!dl_task(curr) || !on_dl_rq(dl_se)) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* Kick cpufreq (see the comment in linux/cpufreq.h). */ | ||||
| 	if (cpu_of(rq) == smp_processor_id()) | ||||
| 		cpufreq_trigger_update(rq_clock(rq)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Consumed budget is computed considering the time as | ||||
| 	 * observed by schedulable tasks (excluding time spent | ||||
|  |  | |||
|  | @ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) | |||
| { | ||||
| 	struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||||
| 	u64 now = cfs_rq_clock_task(cfs_rq); | ||||
| 	int cpu = cpu_of(rq_of(cfs_rq)); | ||||
| 	struct rq *rq = rq_of(cfs_rq); | ||||
| 	int cpu = cpu_of(rq); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Track task load average for carrying it to new CPU after migrated, and | ||||
|  | @ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) | |||
| 
 | ||||
| 	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) | ||||
| 		update_tg_load_avg(cfs_rq, 0); | ||||
| 
 | ||||
| 	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { | ||||
| 		unsigned long max = rq->cpu_capacity_orig; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * There are a few boundary cases this might miss but it should | ||||
| 		 * get called often enough that that should (hopefully) not be | ||||
| 		 * a real problem -- added to that it only calls on the local | ||||
| 		 * CPU, so if we enqueue remotely we'll miss an update, but | ||||
| 		 * the next tick/schedule should update. | ||||
| 		 * | ||||
| 		 * It will not get called when we go idle, because the idle | ||||
| 		 * thread is a different class (!fair), nor will the utilization | ||||
| 		 * number include things like RT tasks. | ||||
| 		 * | ||||
| 		 * As is, the util number is not freq-invariant (we'd have to | ||||
| 		 * implement arch_scale_freq_capacity() for that). | ||||
| 		 * | ||||
| 		 * See cpu_util(). | ||||
| 		 */ | ||||
| 		cpufreq_update_util(rq_clock(rq), | ||||
| 				    min(cfs_rq->avg.util_avg, max), max); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||||
|  |  | |||
|  | @ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq) | |||
| 	if (curr->sched_class != &rt_sched_class) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* Kick cpufreq (see the comment in linux/cpufreq.h). */ | ||||
| 	if (cpu_of(rq) == smp_processor_id()) | ||||
| 		cpufreq_trigger_update(rq_clock(rq)); | ||||
| 
 | ||||
| 	delta_exec = rq_clock_task(rq) - curr->se.exec_start; | ||||
| 	if (unlikely((s64)delta_exec <= 0)) | ||||
| 		return; | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ | |||
| #include <linux/irq_work.h> | ||||
| #include <linux/tick.h> | ||||
| #include <linux/slab.h> | ||||
| #include <linux/cpufreq.h> | ||||
| 
 | ||||
| #include "cpupri.h" | ||||
| #include "cpudeadline.h" | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Rafael J. Wysocki
						Rafael J. Wysocki