mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	sched: Remove get_online_cpus() usage
Remove get_online_cpus() usage from the scheduler; there's 4 sites that
use it:
 - sched_init_smp(); where its completely superfluous since we're in
   'early' boot and there simply cannot be any hotplugging.
 - sched_getaffinity(); we already take a raw spinlock to protect the
   task cpus_allowed mask, this disables preemption and therefore
   also stabilizes cpu_online_mask as that's modified using
   stop_machine. However switch to active mask for symmetry with
   sched_setaffinity()/set_cpus_allowed_ptr(). We guarantee active
   mask stability by inserting sync_rcu/sched() into _cpu_down.
 - sched_setaffinity(); we don't appear to need get_online_cpus()
   either, there's two sites where hotplug appears relevant:
    * cpuset_cpus_allowed(); for the !cpuset case we use possible_mask,
      for the cpuset case we hold task_lock, which is a spinlock and
      thus for mainline disables preemption (might cause pain on RT).
    * set_cpus_allowed_ptr(); Holds all scheduler locks and thus has
      preemption properly disabled; also it already deals with hotplug
      races explicitly where it releases them.
 - migrate_swap(); we can make stop_two_cpus() do the heavy lifting for
   us with a little trickery. By adding a sync_sched/rcu() after the
   CPU_DOWN_PREPARE notifier we can provide preempt/rcu guarantees for
   cpu_active_mask. Use these to validate that both our cpus are active
   when queueing the stop work before we queue the stop_machine works
   for take_cpu_down().
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20131011123820.GV3081@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									746023159c
								
							
						
					
					
						commit
						6acce3ef84
					
				
					 3 changed files with 48 additions and 15 deletions
				
			
		
							
								
								
									
										17
									
								
								kernel/cpu.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								kernel/cpu.c
									
									
									
									
									
								
							|  | @ -308,6 +308,23 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | ||||||
| 	} | 	} | ||||||
| 	smpboot_park_threads(cpu); | 	smpboot_park_threads(cpu); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * By now we've cleared cpu_active_mask, wait for all preempt-disabled | ||||||
|  | 	 * and RCU users of this state to go away such that all new such users | ||||||
|  | 	 * will observe it. | ||||||
|  | 	 * | ||||||
|  | 	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might | ||||||
|  | 	 * not imply sync_sched(), so explicitly call both. | ||||||
|  | 	 */ | ||||||
|  | #ifdef CONFIG_PREEMPT | ||||||
|  | 	synchronize_sched(); | ||||||
|  | #endif | ||||||
|  | 	synchronize_rcu(); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * So now all preempt/rcu users must observe !cpu_active(). | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
| 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | ||||||
| 	if (err) { | 	if (err) { | ||||||
| 		/* CPU didn't die: tell everyone.  Can't complain. */ | 		/* CPU didn't die: tell everyone.  Can't complain. */ | ||||||
|  |  | ||||||
|  | @ -1085,8 +1085,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | ||||||
| 	struct migration_swap_arg arg; | 	struct migration_swap_arg arg; | ||||||
| 	int ret = -EINVAL; | 	int ret = -EINVAL; | ||||||
| 
 | 
 | ||||||
| 	get_online_cpus(); |  | ||||||
| 
 |  | ||||||
| 	arg = (struct migration_swap_arg){ | 	arg = (struct migration_swap_arg){ | ||||||
| 		.src_task = cur, | 		.src_task = cur, | ||||||
| 		.src_cpu = task_cpu(cur), | 		.src_cpu = task_cpu(cur), | ||||||
|  | @ -1097,6 +1095,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | ||||||
| 	if (arg.src_cpu == arg.dst_cpu) | 	if (arg.src_cpu == arg.dst_cpu) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * These three tests are all lockless; this is OK since all of them | ||||||
|  | 	 * will be re-checked with proper locks held further down the line. | ||||||
|  | 	 */ | ||||||
| 	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) | 	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
|  | @ -1109,7 +1111,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | ||||||
| 	ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); | 	ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
| 	put_online_cpus(); |  | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -3710,7 +3711,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | ||||||
| 	struct task_struct *p; | 	struct task_struct *p; | ||||||
| 	int retval; | 	int retval; | ||||||
| 
 | 
 | ||||||
| 	get_online_cpus(); |  | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 
 | 
 | ||||||
| 	p = find_process_by_pid(pid); | 	p = find_process_by_pid(pid); | ||||||
|  | @ -3773,7 +3773,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | ||||||
| 	free_cpumask_var(cpus_allowed); | 	free_cpumask_var(cpus_allowed); | ||||||
| out_put_task: | out_put_task: | ||||||
| 	put_task_struct(p); | 	put_task_struct(p); | ||||||
| 	put_online_cpus(); |  | ||||||
| 	return retval; | 	return retval; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -3818,7 +3817,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | ||||||
| 	unsigned long flags; | 	unsigned long flags; | ||||||
| 	int retval; | 	int retval; | ||||||
| 
 | 
 | ||||||
| 	get_online_cpus(); |  | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 
 | 
 | ||||||
| 	retval = -ESRCH; | 	retval = -ESRCH; | ||||||
|  | @ -3831,12 +3829,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | ||||||
| 		goto out_unlock; | 		goto out_unlock; | ||||||
| 
 | 
 | ||||||
| 	raw_spin_lock_irqsave(&p->pi_lock, flags); | 	raw_spin_lock_irqsave(&p->pi_lock, flags); | ||||||
| 	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 	cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); | ||||||
| 	raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 	raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||||||
| 
 | 
 | ||||||
| out_unlock: | out_unlock: | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 	put_online_cpus(); |  | ||||||
| 
 | 
 | ||||||
| 	return retval; | 	return retval; | ||||||
| } | } | ||||||
|  | @ -6494,14 +6491,17 @@ void __init sched_init_smp(void) | ||||||
| 
 | 
 | ||||||
| 	sched_init_numa(); | 	sched_init_numa(); | ||||||
| 
 | 
 | ||||||
| 	get_online_cpus(); | 	/*
 | ||||||
|  | 	 * There's no userspace yet to cause hotplug operations; hence all the | ||||||
|  | 	 * cpu masks are stable and all blatant races in the below code cannot | ||||||
|  | 	 * happen. | ||||||
|  | 	 */ | ||||||
| 	mutex_lock(&sched_domains_mutex); | 	mutex_lock(&sched_domains_mutex); | ||||||
| 	init_sched_domains(cpu_active_mask); | 	init_sched_domains(cpu_active_mask); | ||||||
| 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | ||||||
| 	if (cpumask_empty(non_isolated_cpus)) | 	if (cpumask_empty(non_isolated_cpus)) | ||||||
| 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | ||||||
| 	mutex_unlock(&sched_domains_mutex); | 	mutex_unlock(&sched_domains_mutex); | ||||||
| 	put_online_cpus(); |  | ||||||
| 
 | 
 | ||||||
| 	hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); | 	hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); | ||||||
| 	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); | 	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); | ||||||
|  |  | ||||||
|  | @ -234,11 +234,13 @@ static void irq_cpu_stop_queue_work(void *arg) | ||||||
|  */ |  */ | ||||||
| int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) | int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) | ||||||
| { | { | ||||||
| 	int call_cpu; |  | ||||||
| 	struct cpu_stop_done done; | 	struct cpu_stop_done done; | ||||||
| 	struct cpu_stop_work work1, work2; | 	struct cpu_stop_work work1, work2; | ||||||
| 	struct irq_cpu_stop_queue_work_info call_args; | 	struct irq_cpu_stop_queue_work_info call_args; | ||||||
| 	struct multi_stop_data msdata = { | 	struct multi_stop_data msdata; | ||||||
|  | 
 | ||||||
|  | 	preempt_disable(); | ||||||
|  | 	msdata = (struct multi_stop_data){ | ||||||
| 		.fn = fn, | 		.fn = fn, | ||||||
| 		.data = arg, | 		.data = arg, | ||||||
| 		.num_threads = 2, | 		.num_threads = 2, | ||||||
|  | @ -261,17 +263,31 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | ||||||
| 	cpu_stop_init_done(&done, 2); | 	cpu_stop_init_done(&done, 2); | ||||||
| 	set_state(&msdata, MULTI_STOP_PREPARE); | 	set_state(&msdata, MULTI_STOP_PREPARE); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * If we observe both CPUs active we know _cpu_down() cannot yet have | ||||||
|  | 	 * queued its stop_machine works and therefore ours will get executed | ||||||
|  | 	 * first. Or its not either one of our CPUs that's getting unplugged, | ||||||
|  | 	 * in which case we don't care. | ||||||
|  | 	 * | ||||||
|  | 	 * This relies on the stopper workqueues to be FIFO. | ||||||
|  | 	 */ | ||||||
|  | 	if (!cpu_active(cpu1) || !cpu_active(cpu2)) { | ||||||
|  | 		preempt_enable(); | ||||||
|  | 		return -ENOENT; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Queuing needs to be done by the lowest numbered CPU, to ensure | 	 * Queuing needs to be done by the lowest numbered CPU, to ensure | ||||||
| 	 * that works are always queued in the same order on every CPU. | 	 * that works are always queued in the same order on every CPU. | ||||||
| 	 * This prevents deadlocks. | 	 * This prevents deadlocks. | ||||||
| 	 */ | 	 */ | ||||||
| 	call_cpu = min(cpu1, cpu2); | 	smp_call_function_single(min(cpu1, cpu2), | ||||||
| 
 | 				 &irq_cpu_stop_queue_work, | ||||||
| 	smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work, |  | ||||||
| 				 &call_args, 0); | 				 &call_args, 0); | ||||||
|  | 	preempt_enable(); | ||||||
| 
 | 
 | ||||||
| 	wait_for_completion(&done.completion); | 	wait_for_completion(&done.completion); | ||||||
|  | 
 | ||||||
| 	return done.executed ? done.ret : -ENOENT; | 	return done.executed ? done.ret : -ENOENT; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Peter Zijlstra
						Peter Zijlstra