forked from mirrors/linux
		
	intel_pstate: Change busy calculation to use fixed point math.
Commitfcb6a15c2e(intel_pstate: Take core C0 time into account for core busy calculation) introduced a regression on some processor SKUs supported by intel_pstate. This was due to the truncation caused by using integer math to calculate core busy and C0 percentages. On a i7-4770K processor operating at 800Mhz going to 100% utilization the percent busy of the CPU using integer math is 22%, but it actually is 22.85%. This value scaled to the current frequency returned 97 which the PID interpreted as no error and did not adjust the P state. Tested on i7-4770K, i7-2600, i5-3230M. Fixes:fcb6a15c2e(intel_pstate: Take core C0 time into account for core busy calculation) References: https://lkml.org/lkml/2014/2/19/626 References: https://bugzilla.kernel.org/show_bug.cgi?id=70941 Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
		
							parent
							
								
									cfbf8d4857
								
							
						
					
					
						commit
						e66c176837
					
				
					 1 changed files with 18 additions and 10 deletions
				
			
		| 
						 | 
					@ -39,9 +39,10 @@
 | 
				
			||||||
#define BYT_TURBO_RATIOS	0x66c
 | 
					#define BYT_TURBO_RATIOS	0x66c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define FRAC_BITS 8
 | 
					#define FRAC_BITS 6
 | 
				
			||||||
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 | 
					#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 | 
				
			||||||
#define fp_toint(X) ((X) >> FRAC_BITS)
 | 
					#define fp_toint(X) ((X) >> FRAC_BITS)
 | 
				
			||||||
 | 
					#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int32_t mul_fp(int32_t x, int32_t y)
 | 
					static inline int32_t mul_fp(int32_t x, int32_t y)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -556,18 +557,20 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 | 
				
			||||||
static inline void intel_pstate_calc_busy(struct cpudata *cpu,
 | 
					static inline void intel_pstate_calc_busy(struct cpudata *cpu,
 | 
				
			||||||
					struct sample *sample)
 | 
										struct sample *sample)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u64 core_pct;
 | 
						int32_t core_pct;
 | 
				
			||||||
	u64 c0_pct;
 | 
						int32_t c0_pct;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
 | 
						core_pct = div_fp(int_tofp((sample->aperf)),
 | 
				
			||||||
 | 
								int_tofp((sample->mperf)));
 | 
				
			||||||
 | 
						core_pct = mul_fp(core_pct, int_tofp(100));
 | 
				
			||||||
 | 
						FP_ROUNDUP(core_pct);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	c0_pct = div64_u64(sample->mperf * 100, sample->tsc);
 | 
					 | 
				
			||||||
	sample->freq = fp_toint(
 | 
						sample->freq = fp_toint(
 | 
				
			||||||
		mul_fp(int_tofp(cpu->pstate.max_pstate),
 | 
							mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct));
 | 
				
			||||||
			int_tofp(core_pct * 1000)));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sample->core_pct_busy = mul_fp(int_tofp(core_pct),
 | 
						sample->core_pct_busy = mul_fp(core_pct, c0_pct);
 | 
				
			||||||
				div_fp(int_tofp(c0_pct + 1), int_tofp(100)));
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void intel_pstate_sample(struct cpudata *cpu)
 | 
					static inline void intel_pstate_sample(struct cpudata *cpu)
 | 
				
			||||||
| 
						 | 
					@ -579,6 +582,10 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
 | 
				
			||||||
	rdmsrl(MSR_IA32_MPERF, mperf);
 | 
						rdmsrl(MSR_IA32_MPERF, mperf);
 | 
				
			||||||
	tsc = native_read_tsc();
 | 
						tsc = native_read_tsc();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						aperf = aperf >> FRAC_BITS;
 | 
				
			||||||
 | 
						mperf = mperf >> FRAC_BITS;
 | 
				
			||||||
 | 
						tsc = tsc >> FRAC_BITS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
 | 
						cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
 | 
				
			||||||
	cpu->samples[cpu->sample_ptr].aperf = aperf;
 | 
						cpu->samples[cpu->sample_ptr].aperf = aperf;
 | 
				
			||||||
	cpu->samples[cpu->sample_ptr].mperf = mperf;
 | 
						cpu->samples[cpu->sample_ptr].mperf = mperf;
 | 
				
			||||||
| 
						 | 
					@ -610,7 +617,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 | 
				
			||||||
	core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy;
 | 
						core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy;
 | 
				
			||||||
	max_pstate = int_tofp(cpu->pstate.max_pstate);
 | 
						max_pstate = int_tofp(cpu->pstate.max_pstate);
 | 
				
			||||||
	current_pstate = int_tofp(cpu->pstate.current_pstate);
 | 
						current_pstate = int_tofp(cpu->pstate.current_pstate);
 | 
				
			||||||
	return mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 | 
						core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 | 
				
			||||||
 | 
						return FP_ROUNDUP(core_busy);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 | 
					static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue