mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	[PATCH] sched: mc/smt power savings sched policy
sysfs entries 'sched_mc_power_savings' and 'sched_smt_power_savings' in /sys/devices/system/cpu/ control the MC/SMT power savings policy for the scheduler. Based on the values (1-enable, 0-disable) for these controls, sched groups cpu power will be determined for different domains. When power savings policy is enabled and under light load conditions, scheduler will minimize the physical packages/cpu cores carrying the load and thus conserving power(with a perf impact based on the workload characteristics... see OLS 2005 CMP kernel scheduler paper for more details..) Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Con Kolivas <kernel@kolivas.org> Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		
							parent
							
								
									369381694d
								
							
						
					
					
						commit
						5c45bf279d
					
				
					 11 changed files with 264 additions and 35 deletions
				
			
		| 
						 | 
					@ -448,9 +448,11 @@ cpumask_t cpu_coregroup_map(int cpu)
 | 
				
			||||||
	struct cpuinfo_x86 *c = cpu_data + cpu;
 | 
						struct cpuinfo_x86 *c = cpu_data + cpu;
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For perf, we return last level cache shared map.
 | 
						 * For perf, we return last level cache shared map.
 | 
				
			||||||
	 * TBD: when power saving sched policy is added, we will return
 | 
						 * And for power savings, we return cpu_core_map
 | 
				
			||||||
	 *      cpu_core_map when power saving policy is enabled
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
 | 
						if (sched_mc_power_savings || sched_smt_power_savings)
 | 
				
			||||||
 | 
							return cpu_core_map[cpu];
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		return c->llc_shared_map;
 | 
							return c->llc_shared_map;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -455,9 +455,11 @@ cpumask_t cpu_coregroup_map(int cpu)
 | 
				
			||||||
	struct cpuinfo_x86 *c = cpu_data + cpu;
 | 
						struct cpuinfo_x86 *c = cpu_data + cpu;
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For perf, we return last level cache shared map.
 | 
						 * For perf, we return last level cache shared map.
 | 
				
			||||||
	 * TBD: when power saving sched policy is added, we will return
 | 
						 * And for power savings, we return cpu_core_map
 | 
				
			||||||
	 *      cpu_core_map when power saving policy is enabled
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
 | 
						if (sched_mc_power_savings || sched_smt_power_savings)
 | 
				
			||||||
 | 
							return cpu_core_map[cpu];
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		return c->llc_shared_map;
 | 
							return c->llc_shared_map;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -143,5 +143,13 @@ EXPORT_SYMBOL_GPL(get_cpu_sysdev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __init cpu_dev_init(void)
 | 
					int __init cpu_dev_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return sysdev_class_register(&cpu_sysdev_class);
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = sysdev_class_register(&cpu_sysdev_class);
 | 
				
			||||||
 | 
					#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 | 
				
			||||||
 | 
						if (!err)
 | 
				
			||||||
 | 
							err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -112,4 +112,9 @@ extern unsigned long node_remap_size[];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern cpumask_t cpu_coregroup_map(int cpu);
 | 
					extern cpumask_t cpu_coregroup_map(int cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
 | 
					#define mc_capable()	(boot_cpu_data.x86_max_cores > 1)
 | 
				
			||||||
 | 
					#define smt_capable()	(smp_num_siblings > 1)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* _ASM_I386_TOPOLOGY_H */
 | 
					#endif /* _ASM_I386_TOPOLOGY_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -112,6 +112,7 @@ void build_cpu_to_node_map(void);
 | 
				
			||||||
#define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 | 
					#define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 | 
				
			||||||
#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 | 
					#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 | 
				
			||||||
#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
 | 
					#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
 | 
				
			||||||
 | 
					#define smt_capable() 				(smp_num_siblings > 1)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm-generic/topology.h>
 | 
					#include <asm-generic/topology.h>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -93,5 +93,10 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* CONFIG_NUMA */
 | 
					#endif /* CONFIG_NUMA */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
 | 
					#include <asm/cputable.h>
 | 
				
			||||||
 | 
					#define smt_capable() 		(cpu_has_feature(CPU_FTR_SMT))
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* __KERNEL__ */
 | 
					#endif /* __KERNEL__ */
 | 
				
			||||||
#endif	/* _ASM_POWERPC_TOPOLOGY_H */
 | 
					#endif	/* _ASM_POWERPC_TOPOLOGY_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,9 @@
 | 
				
			||||||
#ifndef _ASM_SPARC64_TOPOLOGY_H
 | 
					#ifndef _ASM_SPARC64_TOPOLOGY_H
 | 
				
			||||||
#define _ASM_SPARC64_TOPOLOGY_H
 | 
					#define _ASM_SPARC64_TOPOLOGY_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <asm/spitfire.h>
 | 
				
			||||||
 | 
					#define smt_capable()	(tlb_type == hypervisor)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm-generic/topology.h>
 | 
					#include <asm-generic/topology.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* _ASM_SPARC64_TOPOLOGY_H */
 | 
					#endif /* _ASM_SPARC64_TOPOLOGY_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -59,6 +59,8 @@ extern int __node_distance(int, int);
 | 
				
			||||||
#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
 | 
					#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
 | 
				
			||||||
#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 | 
					#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
 | 
				
			||||||
#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
 | 
					#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
 | 
				
			||||||
 | 
					#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
 | 
				
			||||||
 | 
					#define smt_capable() 			(smp_num_siblings > 1)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm-generic/topology.h>
 | 
					#include <asm-generic/topology.h>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -570,6 +570,11 @@ enum idle_type
 | 
				
			||||||
#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
 | 
					#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
 | 
				
			||||||
#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
 | 
					#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
 | 
				
			||||||
#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 | 
					#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 | 
				
			||||||
 | 
					#define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define BALANCE_FOR_POWER	((sched_mc_power_savings || sched_smt_power_savings) \
 | 
				
			||||||
 | 
									 ? SD_POWERSAVINGS_BALANCE : 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct sched_group {
 | 
					struct sched_group {
 | 
				
			||||||
	struct sched_group *next;	/* Must be a circular list */
 | 
						struct sched_group *next;	/* Must be a circular list */
 | 
				
			||||||
| 
						 | 
					@ -1412,6 +1417,11 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
 | 
				
			||||||
extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
 | 
					extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
 | 
				
			||||||
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
 | 
					extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/sysdev.h>
 | 
				
			||||||
 | 
					extern int sched_mc_power_savings, sched_smt_power_savings;
 | 
				
			||||||
 | 
					extern struct sysdev_attribute attr_sched_mc_power_savings, attr_sched_smt_power_savings;
 | 
				
			||||||
 | 
					extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void normalize_rt_tasks(void);
 | 
					extern void normalize_rt_tasks(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_PM
 | 
					#ifdef CONFIG_PM
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,7 +134,8 @@
 | 
				
			||||||
	.flags			= SD_LOAD_BALANCE	\
 | 
						.flags			= SD_LOAD_BALANCE	\
 | 
				
			||||||
				| SD_BALANCE_NEWIDLE	\
 | 
									| SD_BALANCE_NEWIDLE	\
 | 
				
			||||||
				| SD_BALANCE_EXEC	\
 | 
									| SD_BALANCE_EXEC	\
 | 
				
			||||||
				| SD_WAKE_AFFINE,	\
 | 
									| SD_WAKE_AFFINE	\
 | 
				
			||||||
 | 
									| BALANCE_FOR_POWER,	\
 | 
				
			||||||
	.last_balance		= jiffies,		\
 | 
						.last_balance		= jiffies,		\
 | 
				
			||||||
	.balance_interval	= 1,			\
 | 
						.balance_interval	= 1,			\
 | 
				
			||||||
	.nr_balance_failed	= 0,			\
 | 
						.nr_balance_failed	= 0,			\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										220
									
								
								kernel/sched.c
									
									
									
									
									
								
							
							
						
						
									
										220
									
								
								kernel/sched.c
									
									
									
									
									
								
							| 
						 | 
					@ -1162,6 +1162,11 @@ static int sched_balance_self(int cpu, int flag)
 | 
				
			||||||
	struct sched_domain *tmp, *sd = NULL;
 | 
						struct sched_domain *tmp, *sd = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for_each_domain(cpu, tmp) {
 | 
						for_each_domain(cpu, tmp) {
 | 
				
			||||||
 | 
					 		/*
 | 
				
			||||||
 | 
					 	 	 * If power savings logic is enabled for a domain, stop there.
 | 
				
			||||||
 | 
					 	 	 */
 | 
				
			||||||
 | 
							if (tmp->flags & SD_POWERSAVINGS_BALANCE)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
		if (tmp->flags & flag)
 | 
							if (tmp->flags & flag)
 | 
				
			||||||
			sd = tmp;
 | 
								sd = tmp;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -2082,6 +2087,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 | 
				
			||||||
	unsigned long busiest_load_per_task, busiest_nr_running;
 | 
						unsigned long busiest_load_per_task, busiest_nr_running;
 | 
				
			||||||
	unsigned long this_load_per_task, this_nr_running;
 | 
						unsigned long this_load_per_task, this_nr_running;
 | 
				
			||||||
	int load_idx;
 | 
						int load_idx;
 | 
				
			||||||
 | 
					#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 | 
				
			||||||
 | 
						int power_savings_balance = 1;
 | 
				
			||||||
 | 
						unsigned long leader_nr_running = 0, min_load_per_task = 0;
 | 
				
			||||||
 | 
						unsigned long min_nr_running = ULONG_MAX;
 | 
				
			||||||
 | 
						struct sched_group *group_min = NULL, *group_leader = NULL;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	max_load = this_load = total_load = total_pwr = 0;
 | 
						max_load = this_load = total_load = total_pwr = 0;
 | 
				
			||||||
	busiest_load_per_task = busiest_nr_running = 0;
 | 
						busiest_load_per_task = busiest_nr_running = 0;
 | 
				
			||||||
| 
						 | 
					@ -2094,7 +2105,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 | 
				
			||||||
		load_idx = sd->idle_idx;
 | 
							load_idx = sd->idle_idx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		unsigned long load;
 | 
							unsigned long load, group_capacity;
 | 
				
			||||||
		int local_group;
 | 
							int local_group;
 | 
				
			||||||
		int i;
 | 
							int i;
 | 
				
			||||||
		unsigned long sum_nr_running, sum_weighted_load;
 | 
							unsigned long sum_nr_running, sum_weighted_load;
 | 
				
			||||||
| 
						 | 
					@ -2127,18 +2138,76 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 | 
				
			||||||
		/* Adjust by relative CPU power of the group */
 | 
							/* Adjust by relative CPU power of the group */
 | 
				
			||||||
		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
 | 
							avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							group_capacity = group->cpu_power / SCHED_LOAD_SCALE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (local_group) {
 | 
							if (local_group) {
 | 
				
			||||||
			this_load = avg_load;
 | 
								this_load = avg_load;
 | 
				
			||||||
			this = group;
 | 
								this = group;
 | 
				
			||||||
			this_nr_running = sum_nr_running;
 | 
								this_nr_running = sum_nr_running;
 | 
				
			||||||
			this_load_per_task = sum_weighted_load;
 | 
								this_load_per_task = sum_weighted_load;
 | 
				
			||||||
		} else if (avg_load > max_load &&
 | 
							} else if (avg_load > max_load &&
 | 
				
			||||||
			   sum_nr_running > group->cpu_power / SCHED_LOAD_SCALE) {
 | 
								   sum_nr_running > group_capacity) {
 | 
				
			||||||
			max_load = avg_load;
 | 
								max_load = avg_load;
 | 
				
			||||||
			busiest = group;
 | 
								busiest = group;
 | 
				
			||||||
			busiest_nr_running = sum_nr_running;
 | 
								busiest_nr_running = sum_nr_running;
 | 
				
			||||||
			busiest_load_per_task = sum_weighted_load;
 | 
								busiest_load_per_task = sum_weighted_load;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Busy processors will not participate in power savings
 | 
				
			||||||
 | 
							 * balance.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
					 		if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
 | 
				
			||||||
 | 
					 			goto group_next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * If the local group is idle or completely loaded
 | 
				
			||||||
 | 
							 * no need to do power savings balance at this domain
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (local_group && (this_nr_running >= group_capacity ||
 | 
				
			||||||
 | 
									    !this_nr_running))
 | 
				
			||||||
 | 
								power_savings_balance = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 		/*
 | 
				
			||||||
 | 
							 * If a group is already running at full capacity or idle,
 | 
				
			||||||
 | 
							 * don't include that group in power savings calculations
 | 
				
			||||||
 | 
					 		 */
 | 
				
			||||||
 | 
					 		if (!power_savings_balance || sum_nr_running >= group_capacity
 | 
				
			||||||
 | 
							    || !sum_nr_running)
 | 
				
			||||||
 | 
					 			goto group_next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 		/*
 | 
				
			||||||
 | 
							 * Calculate the group which has the least non-idle load.
 | 
				
			||||||
 | 
					 		 * This is the group from where we need to pick up the load
 | 
				
			||||||
 | 
					 		 * for saving power
 | 
				
			||||||
 | 
					 		 */
 | 
				
			||||||
 | 
					 		if ((sum_nr_running < min_nr_running) ||
 | 
				
			||||||
 | 
					 		    (sum_nr_running == min_nr_running &&
 | 
				
			||||||
 | 
							     first_cpu(group->cpumask) <
 | 
				
			||||||
 | 
							     first_cpu(group_min->cpumask))) {
 | 
				
			||||||
 | 
					 			group_min = group;
 | 
				
			||||||
 | 
					 			min_nr_running = sum_nr_running;
 | 
				
			||||||
 | 
								min_load_per_task = sum_weighted_load /
 | 
				
			||||||
 | 
											sum_nr_running;
 | 
				
			||||||
 | 
					 		}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 		/*
 | 
				
			||||||
 | 
							 * Calculate the group which is almost near its
 | 
				
			||||||
 | 
					 		 * capacity but still has some space to pick up some load
 | 
				
			||||||
 | 
					 		 * from other group and save more power
 | 
				
			||||||
 | 
					 		 */
 | 
				
			||||||
 | 
					 		if (sum_nr_running <= group_capacity - 1)
 | 
				
			||||||
 | 
					 			if (sum_nr_running > leader_nr_running ||
 | 
				
			||||||
 | 
					 			    (sum_nr_running == leader_nr_running &&
 | 
				
			||||||
 | 
					 			     first_cpu(group->cpumask) >
 | 
				
			||||||
 | 
					 			      first_cpu(group_leader->cpumask))) {
 | 
				
			||||||
 | 
					 				group_leader = group;
 | 
				
			||||||
 | 
					 				leader_nr_running = sum_nr_running;
 | 
				
			||||||
 | 
					 			}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					group_next:
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
		group = group->next;
 | 
							group = group->next;
 | 
				
			||||||
	} while (group != sd->groups);
 | 
						} while (group != sd->groups);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2247,7 +2316,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 | 
				
			||||||
	return busiest;
 | 
						return busiest;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_balanced:
 | 
					out_balanced:
 | 
				
			||||||
 | 
					#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 | 
				
			||||||
 | 
						if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
 | 
				
			||||||
 | 
							goto ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (this == group_leader && group_leader != group_min) {
 | 
				
			||||||
 | 
							*imbalance = min_load_per_task;
 | 
				
			||||||
 | 
							return group_min;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					ret:
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
	*imbalance = 0;
 | 
						*imbalance = 0;
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2300,7 +2378,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
 | 
				
			||||||
	int active_balance = 0;
 | 
						int active_balance = 0;
 | 
				
			||||||
	int sd_idle = 0;
 | 
						int sd_idle = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
 | 
						if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
 | 
				
			||||||
 | 
						    !sched_smt_power_savings)
 | 
				
			||||||
		sd_idle = 1;
 | 
							sd_idle = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	schedstat_inc(sd, lb_cnt[idle]);
 | 
						schedstat_inc(sd, lb_cnt[idle]);
 | 
				
			||||||
| 
						 | 
					@ -2389,7 +2468,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
 | 
				
			||||||
			sd->balance_interval *= 2;
 | 
								sd->balance_interval *= 2;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 | 
						if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 | 
				
			||||||
 | 
						    !sched_smt_power_savings)
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	return nr_moved;
 | 
						return nr_moved;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2404,7 +2484,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
 | 
				
			||||||
			(sd->balance_interval < sd->max_interval))
 | 
								(sd->balance_interval < sd->max_interval))
 | 
				
			||||||
		sd->balance_interval *= 2;
 | 
							sd->balance_interval *= 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 | 
						if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2425,7 +2505,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
 | 
				
			||||||
	int nr_moved = 0;
 | 
						int nr_moved = 0;
 | 
				
			||||||
	int sd_idle = 0;
 | 
						int sd_idle = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (sd->flags & SD_SHARE_CPUPOWER)
 | 
						if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
 | 
				
			||||||
		sd_idle = 1;
 | 
							sd_idle = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
 | 
						schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
 | 
				
			||||||
| 
						 | 
					@ -2466,7 +2546,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_balanced:
 | 
					out_balanced:
 | 
				
			||||||
	schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
 | 
						schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
 | 
				
			||||||
	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 | 
						if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
 | 
				
			||||||
		return -1;
 | 
							return -1;
 | 
				
			||||||
	sd->nr_balance_failed = 0;
 | 
						sd->nr_balance_failed = 0;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					@ -5732,6 +5812,7 @@ static cpumask_t sched_domain_node_span(int node)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
 | 
					 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
 | 
				
			||||||
 * can switch it on easily if needed.
 | 
					 * can switch it on easily if needed.
 | 
				
			||||||
| 
						 | 
					@ -6113,22 +6194,54 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Calculate CPU power for physical packages and nodes */
 | 
						/* Calculate CPU power for physical packages and nodes */
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_SMT
 | 
				
			||||||
 | 
						for_each_cpu_mask(i, *cpu_map) {
 | 
				
			||||||
 | 
							struct sched_domain *sd;
 | 
				
			||||||
 | 
							sd = &per_cpu(cpu_domains, i);
 | 
				
			||||||
 | 
							sd->groups->cpu_power = SCHED_LOAD_SCALE;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_MC
 | 
				
			||||||
	for_each_cpu_mask(i, *cpu_map) {
 | 
						for_each_cpu_mask(i, *cpu_map) {
 | 
				
			||||||
		int power;
 | 
							int power;
 | 
				
			||||||
		struct sched_domain *sd;
 | 
							struct sched_domain *sd;
 | 
				
			||||||
#ifdef CONFIG_SCHED_SMT
 | 
					 | 
				
			||||||
		sd = &per_cpu(cpu_domains, i);
 | 
					 | 
				
			||||||
		power = SCHED_LOAD_SCALE;
 | 
					 | 
				
			||||||
		sd->groups->cpu_power = power;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifdef CONFIG_SCHED_MC
 | 
					 | 
				
			||||||
		sd = &per_cpu(core_domains, i);
 | 
							sd = &per_cpu(core_domains, i);
 | 
				
			||||||
 | 
							if (sched_smt_power_savings)
 | 
				
			||||||
 | 
								power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
			power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
 | 
								power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
 | 
				
			||||||
					    * SCHED_LOAD_SCALE / 10;
 | 
										    * SCHED_LOAD_SCALE / 10;
 | 
				
			||||||
		sd->groups->cpu_power = power;
 | 
							sd->groups->cpu_power = power;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_cpu_mask(i, *cpu_map) {
 | 
				
			||||||
 | 
							struct sched_domain *sd;
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_MC
 | 
				
			||||||
		sd = &per_cpu(phys_domains, i);
 | 
							sd = &per_cpu(phys_domains, i);
 | 
				
			||||||
 | 
							if (i != first_cpu(sd->groups->cpumask))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							sd->groups->cpu_power = 0;
 | 
				
			||||||
 | 
							if (sched_mc_power_savings || sched_smt_power_savings) {
 | 
				
			||||||
 | 
								int j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 			for_each_cpu_mask(j, sd->groups->cpumask) {
 | 
				
			||||||
 | 
									struct sched_domain *sd1;
 | 
				
			||||||
 | 
					 				sd1 = &per_cpu(core_domains, j);
 | 
				
			||||||
 | 
					 				/*
 | 
				
			||||||
 | 
					 			 	 * for each core we will add once
 | 
				
			||||||
 | 
					 				 * to the group in physical domain
 | 
				
			||||||
 | 
					 			 	 */
 | 
				
			||||||
 | 
					  	 			if (j != first_cpu(sd1->groups->cpumask))
 | 
				
			||||||
 | 
					 					continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 				if (sched_smt_power_savings)
 | 
				
			||||||
 | 
					   					sd->groups->cpu_power += sd1->groups->cpu_power;
 | 
				
			||||||
 | 
					 				else
 | 
				
			||||||
 | 
					   					sd->groups->cpu_power += SCHED_LOAD_SCALE;
 | 
				
			||||||
 | 
					   			}
 | 
				
			||||||
 | 
					 		} else
 | 
				
			||||||
 			/*
 | 
					 			/*
 | 
				
			||||||
 			 * This has to be < 2 * SCHED_LOAD_SCALE
 | 
					 			 * This has to be < 2 * SCHED_LOAD_SCALE
 | 
				
			||||||
 			 * Lets keep it SCHED_LOAD_SCALE, so that
 | 
					 			 * Lets keep it SCHED_LOAD_SCALE, so that
 | 
				
			||||||
| 
						 | 
					@ -6141,9 +6254,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 | 
				
			||||||
 			 */
 | 
					 			 */
 | 
				
			||||||
 			sd->groups->cpu_power = SCHED_LOAD_SCALE;
 | 
					 			sd->groups->cpu_power = SCHED_LOAD_SCALE;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
							int power;
 | 
				
			||||||
		sd = &per_cpu(phys_domains, i);
 | 
							sd = &per_cpu(phys_domains, i);
 | 
				
			||||||
		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 | 
							if (sched_smt_power_savings)
 | 
				
			||||||
				(cpus_weight(sd->groups->cpumask)-1) / 10;
 | 
								power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								power = SCHED_LOAD_SCALE;
 | 
				
			||||||
		sd->groups->cpu_power = power;
 | 
							sd->groups->cpu_power = power;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -6244,6 +6360,80 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 | 
				
			||||||
 | 
					int arch_reinit_sched_domains(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lock_cpu_hotplug();
 | 
				
			||||||
 | 
						detach_destroy_domains(&cpu_online_map);
 | 
				
			||||||
 | 
						err = arch_init_sched_domains(&cpu_online_map);
 | 
				
			||||||
 | 
						unlock_cpu_hotplug();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (buf[0] != '0' && buf[0] != '1')
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (smt)
 | 
				
			||||||
 | 
							sched_smt_power_savings = (buf[0] == '1');
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							sched_mc_power_savings = (buf[0] == '1');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = arch_reinit_sched_domains();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret ? ret : count;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int err = 0;
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_SMT
 | 
				
			||||||
 | 
						if (smt_capable())
 | 
				
			||||||
 | 
							err = sysfs_create_file(&cls->kset.kobj,
 | 
				
			||||||
 | 
										&attr_sched_smt_power_savings.attr);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_MC
 | 
				
			||||||
 | 
						if (!err && mc_capable())
 | 
				
			||||||
 | 
							err = sysfs_create_file(&cls->kset.kobj,
 | 
				
			||||||
 | 
										&attr_sched_mc_power_savings.attr);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_MC
 | 
				
			||||||
 | 
					static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return sprintf(page, "%u\n", sched_mc_power_savings);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return sched_power_savings_store(buf, count, 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
 | 
				
			||||||
 | 
						    sched_mc_power_savings_store);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_SCHED_SMT
 | 
				
			||||||
 | 
					static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return sprintf(page, "%u\n", sched_smt_power_savings);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return sched_power_savings_store(buf, count, 1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
 | 
				
			||||||
 | 
						    sched_smt_power_savings_store);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_HOTPLUG_CPU
 | 
					#ifdef CONFIG_HOTPLUG_CPU
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Force a reinitialization of the sched domains hierarchy.  The domains
 | 
					 * Force a reinitialization of the sched domains hierarchy.  The domains
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue