mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	cpufreq: intel_pstate: Generic governors support
There may be reasons to use generic cpufreq governors (eg. schedutil) on Intel platforms instead of the intel_pstate driver's internal governor. However, that currently can only be done by disabling intel_pstate altogether and using the acpi-cpufreq driver instead of it, which is subject to limitations. First of all, acpi-cpufreq only works on systems where the _PSS object is present in the ACPI tables for all logical CPUs. Second, on those systems acpi-cpufreq will only use frequencies listed by _PSS which may be suboptimal. In particular, by convention, the whole turbo range is represented in _PSS as a single P-state and the frequency assigned to it is greater by 1 MHz than the greatest non-turbo frequency listed by _PSS. That may confuse governors to use turbo frequencies less frequently which may lead to suboptimal performance. For this reason, make it possible to use the intel_pstate driver with generic cpufreq governors as a "normal" cpufreq driver. That mode is enforced by adding intel_pstate=passive to the kernel command line and cannot be disabled at run time. In that mode, intel_pstate provides a cpufreq driver interface including the ->target() and ->fast_switch() callbacks and is listed in scaling_driver as "intel_cpufreq". Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Doug Smythies <dsmythies@telus.net>
This commit is contained in:
		
							parent
							
								
									d0ea59e188
								
							
						
					
					
						commit
						001c76f05b
					
				
					 2 changed files with 181 additions and 29 deletions
				
			
		| 
						 | 
				
			
			@ -1760,6 +1760,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 | 
			
		|||
		       disable
 | 
			
		||||
		         Do not enable intel_pstate as the default
 | 
			
		||||
		         scaling driver for the supported processors
 | 
			
		||||
		       passive
 | 
			
		||||
			 Use intel_pstate as a scaling driver, but configure it
 | 
			
		||||
			 to work with generic cpufreq governors (instead of
 | 
			
		||||
			 enabling its internal governor).  This mode cannot be
 | 
			
		||||
			 used along with the hardware-managed P-states (HWP)
 | 
			
		||||
			 feature.
 | 
			
		||||
		       force
 | 
			
		||||
			 Enable intel_pstate on systems that prohibit it by default
 | 
			
		||||
			 in favor of acpi-cpufreq. Forcing the intel_pstate driver
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,6 +37,8 @@
 | 
			
		|||
#include <asm/cpufeature.h>
 | 
			
		||||
#include <asm/intel-family.h>
 | 
			
		||||
 | 
			
		||||
#define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
 | 
			
		||||
 | 
			
		||||
#define ATOM_RATIOS		0x66a
 | 
			
		||||
#define ATOM_VIDS		0x66b
 | 
			
		||||
#define ATOM_TURBO_RATIOS	0x66c
 | 
			
		||||
| 
						 | 
				
			
			@ -122,6 +124,8 @@ struct sample {
 | 
			
		|||
 * @scaling:		Scaling factor to  convert frequency to cpufreq
 | 
			
		||||
 *			frequency units
 | 
			
		||||
 * @turbo_pstate:	Max Turbo P state possible for this platform
 | 
			
		||||
 * @max_freq:		@max_pstate frequency in cpufreq units
 | 
			
		||||
 * @turbo_freq:		@turbo_pstate frequency in cpufreq units
 | 
			
		||||
 *
 | 
			
		||||
 * Stores the per cpu model P state limits and current P state.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -132,6 +136,8 @@ struct pstate_data {
 | 
			
		|||
	int	max_pstate_physical;
 | 
			
		||||
	int	scaling;
 | 
			
		||||
	int	turbo_pstate;
 | 
			
		||||
	unsigned int max_freq;
 | 
			
		||||
	unsigned int turbo_freq;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -470,7 +476,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 | 
			
		|||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 | 
			
		||||
static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -1225,6 +1231,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 | 
			
		|||
	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
 | 
			
		||||
	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 | 
			
		||||
	cpu->pstate.scaling = pstate_funcs.get_scaling();
 | 
			
		||||
	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
 | 
			
		||||
	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 | 
			
		||||
 | 
			
		||||
	if (pstate_funcs.get_vid)
 | 
			
		||||
		pstate_funcs.get_vid(cpu);
 | 
			
		||||
| 
						 | 
				
			
			@ -1363,15 +1371,19 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 | 
			
		|||
	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 | 
			
		||||
static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 | 
			
		||||
{
 | 
			
		||||
	int max_perf, min_perf;
 | 
			
		||||
 | 
			
		||||
	update_turbo_state();
 | 
			
		||||
 | 
			
		||||
	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 | 
			
		||||
	pstate = clamp_t(int, pstate, min_perf, max_perf);
 | 
			
		||||
	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 | 
			
		||||
	return pstate;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 | 
			
		||||
{
 | 
			
		||||
	pstate = intel_pstate_prepare_request(cpu, pstate);
 | 
			
		||||
	if (pstate == cpu->pstate.current_pstate)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1389,6 +1401,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 | 
			
		|||
	target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
 | 
			
		||||
		cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
 | 
			
		||||
 | 
			
		||||
	update_turbo_state();
 | 
			
		||||
 | 
			
		||||
	intel_pstate_update_pstate(cpu, target_pstate);
 | 
			
		||||
 | 
			
		||||
	sample = &cpu->sample;
 | 
			
		||||
| 
						 | 
				
			
			@ -1670,22 +1684,30 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 | 
			
		||||
static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	int cpu_num = policy->cpu;
 | 
			
		||||
	struct cpudata *cpu = all_cpu_data[cpu_num];
 | 
			
		||||
 | 
			
		||||
	pr_debug("CPU %d exiting\n", cpu_num);
 | 
			
		||||
 | 
			
		||||
	intel_pstate_clear_update_util_hook(cpu_num);
 | 
			
		||||
 | 
			
		||||
	if (hwp_active)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	intel_pstate_set_min_pstate(cpu);
 | 
			
		||||
	intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
			
		||||
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	pr_debug("CPU %d exiting\n", policy->cpu);
 | 
			
		||||
 | 
			
		||||
	intel_pstate_clear_update_util_hook(policy->cpu);
 | 
			
		||||
	if (!hwp_active)
 | 
			
		||||
		intel_cpufreq_stop_cpu(policy);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	intel_pstate_exit_perf_limits(policy);
 | 
			
		||||
 | 
			
		||||
	policy->fast_switch_possible = false;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	struct cpudata *cpu;
 | 
			
		||||
	int rc;
 | 
			
		||||
| 
						 | 
				
			
			@ -1696,11 +1718,6 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
			
		|||
 | 
			
		||||
	cpu = all_cpu_data[policy->cpu];
 | 
			
		||||
 | 
			
		||||
	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
 | 
			
		||||
		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 | 
			
		||||
	else
 | 
			
		||||
		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * We need sane value in the cpu->perf_limits, so inherit from global
 | 
			
		||||
	 * perf_limits limits, which are seeded with values based on the
 | 
			
		||||
| 
						 | 
				
			
			@ -1720,20 +1737,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
			
		|||
	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
 | 
			
		||||
 | 
			
		||||
	intel_pstate_init_acpi_perf_limits(policy);
 | 
			
		||||
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 | 
			
		||||
	cpumask_set_cpu(policy->cpu, policy->cpus);
 | 
			
		||||
 | 
			
		||||
	policy->fast_switch_possible = true;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 | 
			
		||||
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	intel_pstate_exit_perf_limits(policy);
 | 
			
		||||
	int ret = __intel_pstate_cpu_init(policy);
 | 
			
		||||
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 | 
			
		||||
	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
 | 
			
		||||
		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 | 
			
		||||
	else
 | 
			
		||||
		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct cpufreq_driver intel_pstate_driver = {
 | 
			
		||||
static struct cpufreq_driver intel_pstate = {
 | 
			
		||||
	.flags		= CPUFREQ_CONST_LOOPS,
 | 
			
		||||
	.verify		= intel_pstate_verify_policy,
 | 
			
		||||
	.setpolicy	= intel_pstate_set_policy,
 | 
			
		||||
| 
						 | 
				
			
			@ -1745,6 +1772,118 @@ static struct cpufreq_driver intel_pstate_driver = {
 | 
			
		|||
	.name		= "intel_pstate",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	struct cpudata *cpu = all_cpu_data[policy->cpu];
 | 
			
		||||
	struct perf_limits *perf_limits = limits;
 | 
			
		||||
 | 
			
		||||
	update_turbo_state();
 | 
			
		||||
	policy->cpuinfo.max_freq = limits->turbo_disabled ?
 | 
			
		||||
			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 | 
			
		||||
 | 
			
		||||
	cpufreq_verify_within_cpu_limits(policy);
 | 
			
		||||
 | 
			
		||||
	if (per_cpu_limits)
 | 
			
		||||
		perf_limits = cpu->perf_limits;
 | 
			
		||||
 | 
			
		||||
	intel_pstate_update_perf_limits(policy, perf_limits);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
 | 
			
		||||
					       struct cpufreq_policy *policy,
 | 
			
		||||
					       unsigned int target_freq)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int max_freq;
 | 
			
		||||
 | 
			
		||||
	update_turbo_state();
 | 
			
		||||
 | 
			
		||||
	max_freq = limits->no_turbo || limits->turbo_disabled ?
 | 
			
		||||
			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 | 
			
		||||
	policy->cpuinfo.max_freq = max_freq;
 | 
			
		||||
	if (policy->max > max_freq)
 | 
			
		||||
		policy->max = max_freq;
 | 
			
		||||
 | 
			
		||||
	if (target_freq > max_freq)
 | 
			
		||||
		target_freq = max_freq;
 | 
			
		||||
 | 
			
		||||
	return target_freq;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int intel_cpufreq_target(struct cpufreq_policy *policy,
 | 
			
		||||
				unsigned int target_freq,
 | 
			
		||||
				unsigned int relation)
 | 
			
		||||
{
 | 
			
		||||
	struct cpudata *cpu = all_cpu_data[policy->cpu];
 | 
			
		||||
	struct cpufreq_freqs freqs;
 | 
			
		||||
	int target_pstate;
 | 
			
		||||
 | 
			
		||||
	freqs.old = policy->cur;
 | 
			
		||||
	freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 | 
			
		||||
 | 
			
		||||
	cpufreq_freq_transition_begin(policy, &freqs);
 | 
			
		||||
	switch (relation) {
 | 
			
		||||
	case CPUFREQ_RELATION_L:
 | 
			
		||||
		target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
 | 
			
		||||
		break;
 | 
			
		||||
	case CPUFREQ_RELATION_H:
 | 
			
		||||
		target_pstate = freqs.new / cpu->pstate.scaling;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 | 
			
		||||
	if (target_pstate != cpu->pstate.current_pstate) {
 | 
			
		||||
		cpu->pstate.current_pstate = target_pstate;
 | 
			
		||||
		wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
 | 
			
		||||
			      pstate_funcs.get_val(cpu, target_pstate));
 | 
			
		||||
	}
 | 
			
		||||
	cpufreq_freq_transition_end(policy, &freqs, false);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 | 
			
		||||
					      unsigned int target_freq)
 | 
			
		||||
{
 | 
			
		||||
	struct cpudata *cpu = all_cpu_data[policy->cpu];
 | 
			
		||||
	int target_pstate;
 | 
			
		||||
 | 
			
		||||
	target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 | 
			
		||||
	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
 | 
			
		||||
	intel_pstate_update_pstate(cpu, target_pstate);
 | 
			
		||||
	return target_freq;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 | 
			
		||||
{
 | 
			
		||||
	int ret = __intel_pstate_cpu_init(policy);
 | 
			
		||||
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
 | 
			
		||||
	/* This reflects the intel_pstate_get_cpu_pstates() setting. */
 | 
			
		||||
	policy->cur = policy->cpuinfo.min_freq;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct cpufreq_driver intel_cpufreq = {
 | 
			
		||||
	.flags		= CPUFREQ_CONST_LOOPS,
 | 
			
		||||
	.verify		= intel_cpufreq_verify_policy,
 | 
			
		||||
	.target		= intel_cpufreq_target,
 | 
			
		||||
	.fast_switch	= intel_cpufreq_fast_switch,
 | 
			
		||||
	.init		= intel_cpufreq_cpu_init,
 | 
			
		||||
	.exit		= intel_pstate_cpu_exit,
 | 
			
		||||
	.stop_cpu	= intel_cpufreq_stop_cpu,
 | 
			
		||||
	.name		= "intel_cpufreq",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
 | 
			
		||||
 | 
			
		||||
static int no_load __initdata;
 | 
			
		||||
static int no_hwp __initdata;
 | 
			
		||||
static int hwp_only __initdata;
 | 
			
		||||
| 
						 | 
				
			
			@ -1976,7 +2115,7 @@ static int __init intel_pstate_init(void)
 | 
			
		|||
 | 
			
		||||
	intel_pstate_request_control_from_smm();
 | 
			
		||||
 | 
			
		||||
	rc = cpufreq_register_driver(&intel_pstate_driver);
 | 
			
		||||
	rc = cpufreq_register_driver(intel_pstate_driver);
 | 
			
		||||
	if (rc)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1991,7 +2130,9 @@ static int __init intel_pstate_init(void)
 | 
			
		|||
	get_online_cpus();
 | 
			
		||||
	for_each_online_cpu(cpu) {
 | 
			
		||||
		if (all_cpu_data[cpu]) {
 | 
			
		||||
			intel_pstate_clear_update_util_hook(cpu);
 | 
			
		||||
			if (intel_pstate_driver == &intel_pstate)
 | 
			
		||||
				intel_pstate_clear_update_util_hook(cpu);
 | 
			
		||||
 | 
			
		||||
			kfree(all_cpu_data[cpu]);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -2007,8 +2148,13 @@ static int __init intel_pstate_setup(char *str)
 | 
			
		|||
	if (!str)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if (!strcmp(str, "disable"))
 | 
			
		||||
	if (!strcmp(str, "disable")) {
 | 
			
		||||
		no_load = 1;
 | 
			
		||||
	} else if (!strcmp(str, "passive")) {
 | 
			
		||||
		pr_info("Passive mode enabled\n");
 | 
			
		||||
		intel_pstate_driver = &intel_cpufreq;
 | 
			
		||||
		no_hwp = 1;
 | 
			
		||||
	}
 | 
			
		||||
	if (!strcmp(str, "no_hwp")) {
 | 
			
		||||
		pr_info("HWP disabled\n");
 | 
			
		||||
		no_hwp = 1;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue