forked from mirrors/linux
		
	cpufreq: intel_pstate: Enforce _PPC limits
Use ACPI _PPC notification to limit max P state driver will request. ACPI _PPC change notification is sent by BIOS to limit max P state in several cases: - Reduce impact of platform thermal condition - When Config TDP feature is used, a changed _PPC is sent to follow TDP change - Remote node managers in server want to control platform power via baseboard management controller (BMC) This change registers with ACPI processor performance lib so that _PPC changes are notified to cpufreq core, which in turns will result in call to .setpolicy() callback. Also the way _PSS table identifies a turbo frequency is not compatible to max turbo frequency in intel_pstate, so the very first entry in _PSS needs to be adjusted. This feature can be turned on by using kernel parameters: intel_pstate=support_acpi_ppc Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> [ rjw: Minor cleanups ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
		
							parent
							
								
									eaa2c3aeef
								
							
						
					
					
						commit
						9522a2ff9c
					
				
					 3 changed files with 137 additions and 2 deletions
				
			
		| 
						 | 
					@ -1661,6 +1661,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 | 
				
			||||||
		hwp_only
 | 
							hwp_only
 | 
				
			||||||
			Only load intel_pstate on systems which support
 | 
								Only load intel_pstate on systems which support
 | 
				
			||||||
			hardware P state control (HWP) if available.
 | 
								hardware P state control (HWP) if available.
 | 
				
			||||||
 | 
							support_acpi_ppc
 | 
				
			||||||
 | 
								Enforce ACPI _PPC performance limits.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	intremap=	[X86-64, Intel-IOMMU]
 | 
						intremap=	[X86-64, Intel-IOMMU]
 | 
				
			||||||
			on	enable Interrupt Remapping (default)
 | 
								on	enable Interrupt Remapping (default)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,6 +5,7 @@
 | 
				
			||||||
config X86_INTEL_PSTATE
 | 
					config X86_INTEL_PSTATE
 | 
				
			||||||
       bool "Intel P state control"
 | 
					       bool "Intel P state control"
 | 
				
			||||||
       depends on X86
 | 
					       depends on X86
 | 
				
			||||||
 | 
					       select ACPI_PROCESSOR if ACPI
 | 
				
			||||||
       help
 | 
					       help
 | 
				
			||||||
          This driver provides a P state for Intel core processors.
 | 
					          This driver provides a P state for Intel core processors.
 | 
				
			||||||
	  The driver implements an internal governor and will become
 | 
						  The driver implements an internal governor and will become
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -41,6 +41,10 @@
 | 
				
			||||||
#define ATOM_TURBO_RATIOS	0x66c
 | 
					#define ATOM_TURBO_RATIOS	0x66c
 | 
				
			||||||
#define ATOM_TURBO_VIDS		0x66d
 | 
					#define ATOM_TURBO_VIDS		0x66d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
 | 
					#include <acpi/processor.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define FRAC_BITS 8
 | 
					#define FRAC_BITS 8
 | 
				
			||||||
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 | 
					#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 | 
				
			||||||
#define fp_toint(X) ((X) >> FRAC_BITS)
 | 
					#define fp_toint(X) ((X) >> FRAC_BITS)
 | 
				
			||||||
| 
						 | 
					@ -174,6 +178,8 @@ struct _pid {
 | 
				
			||||||
 * @prev_cummulative_iowait: IO Wait time difference from last and
 | 
					 * @prev_cummulative_iowait: IO Wait time difference from last and
 | 
				
			||||||
 *			current sample
 | 
					 *			current sample
 | 
				
			||||||
 * @sample:		Storage for storing last Sample data
 | 
					 * @sample:		Storage for storing last Sample data
 | 
				
			||||||
 | 
					 * @acpi_perf_data:	Stores ACPI perf information read from _PSS
 | 
				
			||||||
 | 
					 * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * This structure stores per CPU instance data for all CPUs.
 | 
					 * This structure stores per CPU instance data for all CPUs.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -192,6 +198,10 @@ struct cpudata {
 | 
				
			||||||
	u64	prev_tsc;
 | 
						u64	prev_tsc;
 | 
				
			||||||
	u64	prev_cummulative_iowait;
 | 
						u64	prev_cummulative_iowait;
 | 
				
			||||||
	struct sample sample;
 | 
						struct sample sample;
 | 
				
			||||||
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
 | 
						struct acpi_processor_performance acpi_perf_data;
 | 
				
			||||||
 | 
						bool valid_pss_table;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct cpudata **all_cpu_data;
 | 
					static struct cpudata **all_cpu_data;
 | 
				
			||||||
| 
						 | 
					@ -260,6 +270,9 @@ static struct pstate_adjust_policy pid_params;
 | 
				
			||||||
static struct pstate_funcs pstate_funcs;
 | 
					static struct pstate_funcs pstate_funcs;
 | 
				
			||||||
static int hwp_active;
 | 
					static int hwp_active;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
 | 
					static bool acpi_ppc;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * struct perf_limits - Store user and policy limits
 | 
					 * struct perf_limits - Store user and policy limits
 | 
				
			||||||
| 
						 | 
					@ -333,6 +346,111 @@ static struct perf_limits *limits = &performance_limits;
 | 
				
			||||||
static struct perf_limits *limits = &powersave_limits;
 | 
					static struct perf_limits *limits = &powersave_limits;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
 | 
				
			||||||
 | 
					 * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
 | 
				
			||||||
 | 
					 * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
 | 
				
			||||||
 | 
					 * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
 | 
				
			||||||
 | 
					 * target ratio 0x17. The _PSS control value stores in a format which can be
 | 
				
			||||||
 | 
					 * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
 | 
				
			||||||
 | 
					 * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
 | 
				
			||||||
 | 
					 * This function converts the _PSS control value to intel pstate driver format
 | 
				
			||||||
 | 
					 * for comparison and assignment.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return cpu->acpi_perf_data.states[index].control >> 8;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct cpudata *cpu;
 | 
				
			||||||
 | 
						int turbo_pss_ctl;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!acpi_ppc)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpu = all_cpu_data[policy->cpu];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
 | 
				
			||||||
 | 
											  policy->cpu);
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Check if the control value in _PSS is for PERF_CTL MSR, which should
 | 
				
			||||||
 | 
						 * guarantee that the states returned by it map to the states in our
 | 
				
			||||||
 | 
						 * list directly.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (cpu->acpi_perf_data.control_register.space_id !=
 | 
				
			||||||
 | 
											ACPI_ADR_SPACE_FIXED_HARDWARE)
 | 
				
			||||||
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If there is only one entry _PSS, simply ignore _PSS and continue as
 | 
				
			||||||
 | 
						 * usual without taking _PSS into account
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (cpu->acpi_perf_data.state_count < 2)
 | 
				
			||||||
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
 | 
				
			||||||
 | 
						for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
 | 
				
			||||||
 | 
							pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
 | 
				
			||||||
 | 
								 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
 | 
				
			||||||
 | 
								 (u32) cpu->acpi_perf_data.states[i].core_frequency,
 | 
				
			||||||
 | 
								 (u32) cpu->acpi_perf_data.states[i].power,
 | 
				
			||||||
 | 
								 (u32) cpu->acpi_perf_data.states[i].control);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * The _PSS table doesn't contain whole turbo frequency range.
 | 
				
			||||||
 | 
						 * This just contains +1 MHZ above the max non turbo frequency,
 | 
				
			||||||
 | 
						 * with control value corresponding to max turbo ratio. But
 | 
				
			||||||
 | 
						 * when cpufreq set policy is called, it will call with this
 | 
				
			||||||
 | 
						 * max frequency, which will cause a reduced performance as
 | 
				
			||||||
 | 
						 * this driver uses real max turbo frequency as the max
 | 
				
			||||||
 | 
						 * frequency. So correct this frequency in _PSS table to
 | 
				
			||||||
 | 
						 * correct max turbo frequency based on the turbo ratio.
 | 
				
			||||||
 | 
						 * Also need to convert to MHz as _PSS freq is in MHz.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
 | 
				
			||||||
 | 
						if (turbo_pss_ctl > cpu->pstate.max_pstate)
 | 
				
			||||||
 | 
							cpu->acpi_perf_data.states[0].core_frequency =
 | 
				
			||||||
 | 
										policy->cpuinfo.max_freq / 1000;
 | 
				
			||||||
 | 
						cpu->valid_pss_table = true;
 | 
				
			||||||
 | 
						pr_info("_PPC limits will be enforced\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 err:
 | 
				
			||||||
 | 
						cpu->valid_pss_table = false;
 | 
				
			||||||
 | 
						acpi_processor_unregister_performance(policy->cpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct cpudata *cpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpu = all_cpu_data[policy->cpu];
 | 
				
			||||||
 | 
						if (!cpu->valid_pss_table)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						acpi_processor_unregister_performance(policy->cpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 | 
					static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 | 
				
			||||||
			     int deadband, int integral) {
 | 
								     int deadband, int integral) {
 | 
				
			||||||
	pid->setpoint = int_tofp(setpoint);
 | 
						pid->setpoint = int_tofp(setpoint);
 | 
				
			||||||
| 
						 | 
					@ -1398,18 +1516,27 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 | 
				
			||||||
	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 | 
						policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 | 
				
			||||||
	policy->cpuinfo.max_freq =
 | 
						policy->cpuinfo.max_freq =
 | 
				
			||||||
		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 | 
							cpu->pstate.turbo_pstate * cpu->pstate.scaling;
 | 
				
			||||||
 | 
						intel_pstate_init_acpi_perf_limits(policy);
 | 
				
			||||||
	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 | 
						policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 | 
				
			||||||
	cpumask_set_cpu(policy->cpu, policy->cpus);
 | 
						cpumask_set_cpu(policy->cpu, policy->cpus);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						intel_pstate_exit_perf_limits(policy);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct cpufreq_driver intel_pstate_driver = {
 | 
					static struct cpufreq_driver intel_pstate_driver = {
 | 
				
			||||||
	.flags		= CPUFREQ_CONST_LOOPS,
 | 
						.flags		= CPUFREQ_CONST_LOOPS,
 | 
				
			||||||
	.verify		= intel_pstate_verify_policy,
 | 
						.verify		= intel_pstate_verify_policy,
 | 
				
			||||||
	.setpolicy	= intel_pstate_set_policy,
 | 
						.setpolicy	= intel_pstate_set_policy,
 | 
				
			||||||
	.get		= intel_pstate_get,
 | 
						.get		= intel_pstate_get,
 | 
				
			||||||
	.init		= intel_pstate_cpu_init,
 | 
						.init		= intel_pstate_cpu_init,
 | 
				
			||||||
 | 
						.exit		= intel_pstate_cpu_exit,
 | 
				
			||||||
	.stop_cpu	= intel_pstate_stop_cpu,
 | 
						.stop_cpu	= intel_pstate_stop_cpu,
 | 
				
			||||||
	.name		= "intel_pstate",
 | 
						.name		= "intel_pstate",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					@ -1453,8 +1580,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if IS_ENABLED(CONFIG_ACPI)
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
#include <acpi/processor.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool intel_pstate_no_acpi_pss(void)
 | 
					static bool intel_pstate_no_acpi_pss(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1660,6 +1786,12 @@ static int __init intel_pstate_setup(char *str)
 | 
				
			||||||
		force_load = 1;
 | 
							force_load = 1;
 | 
				
			||||||
	if (!strcmp(str, "hwp_only"))
 | 
						if (!strcmp(str, "hwp_only"))
 | 
				
			||||||
		hwp_only = 1;
 | 
							hwp_only = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_ACPI
 | 
				
			||||||
 | 
						if (!strcmp(str, "support_acpi_ppc"))
 | 
				
			||||||
 | 
							acpi_ppc = true;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
early_param("intel_pstate", intel_pstate_setup);
 | 
					early_param("intel_pstate", intel_pstate_setup);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue