mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	sched: Fix race between ttwu() and task_rq_lock()
Thomas found that due to ttwu() changing a task's cpu without holding the rq->lock, task_rq_lock() might end up locking the wrong rq. Avoid this by serializing against TASK_WAKING. Reported-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1266241712.15770.420.camel@laptop> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
		
							parent
							
								
									9000f05c6d
								
							
						
					
					
						commit
						0970d2992d
					
				
					 1 changed files with 46 additions and 27 deletions
				
			
		| 
						 | 
					@ -940,6 +940,19 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 | 
					#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Check whether the task is waking, we use this to synchronize against
 | 
				
			||||||
 | 
					 * ttwu() so that task_cpu() reports a stable number.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We need to make an exception for PF_STARTING tasks because the fork
 | 
				
			||||||
 | 
					 * path might require task_rq_lock() to work, eg. it can call
 | 
				
			||||||
 | 
					 * set_cpus_allowed_ptr() from the cpuset clone_ns code.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline int task_is_waking(struct task_struct *p)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * __task_rq_lock - lock the runqueue a given task resides on.
 | 
					 * __task_rq_lock - lock the runqueue a given task resides on.
 | 
				
			||||||
 * Must be called interrupts disabled.
 | 
					 * Must be called interrupts disabled.
 | 
				
			||||||
| 
						 | 
					@ -947,10 +960,14 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 | 
				
			||||||
static inline struct rq *__task_rq_lock(struct task_struct *p)
 | 
					static inline struct rq *__task_rq_lock(struct task_struct *p)
 | 
				
			||||||
	__acquires(rq->lock)
 | 
						__acquires(rq->lock)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct rq *rq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (;;) {
 | 
						for (;;) {
 | 
				
			||||||
		struct rq *rq = task_rq(p);
 | 
							while (task_is_waking(p))
 | 
				
			||||||
 | 
								cpu_relax();
 | 
				
			||||||
 | 
							rq = task_rq(p);
 | 
				
			||||||
		raw_spin_lock(&rq->lock);
 | 
							raw_spin_lock(&rq->lock);
 | 
				
			||||||
		if (likely(rq == task_rq(p)))
 | 
							if (likely(rq == task_rq(p) && !task_is_waking(p)))
 | 
				
			||||||
			return rq;
 | 
								return rq;
 | 
				
			||||||
		raw_spin_unlock(&rq->lock);
 | 
							raw_spin_unlock(&rq->lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -967,10 +984,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 | 
				
			||||||
	struct rq *rq;
 | 
						struct rq *rq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (;;) {
 | 
						for (;;) {
 | 
				
			||||||
 | 
							while (task_is_waking(p))
 | 
				
			||||||
 | 
								cpu_relax();
 | 
				
			||||||
		local_irq_save(*flags);
 | 
							local_irq_save(*flags);
 | 
				
			||||||
		rq = task_rq(p);
 | 
							rq = task_rq(p);
 | 
				
			||||||
		raw_spin_lock(&rq->lock);
 | 
							raw_spin_lock(&rq->lock);
 | 
				
			||||||
		if (likely(rq == task_rq(p)))
 | 
							if (likely(rq == task_rq(p) && !task_is_waking(p)))
 | 
				
			||||||
			return rq;
 | 
								return rq;
 | 
				
			||||||
		raw_spin_unlock_irqrestore(&rq->lock, *flags);
 | 
							raw_spin_unlock_irqrestore(&rq->lock, *flags);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -2408,14 +2427,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 | 
				
			||||||
	__task_rq_unlock(rq);
 | 
						__task_rq_unlock(rq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 | 
						cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 | 
				
			||||||
	if (cpu != orig_cpu)
 | 
						if (cpu != orig_cpu) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Since we migrate the task without holding any rq->lock,
 | 
				
			||||||
 | 
							 * we need to be careful with task_rq_lock(), since that
 | 
				
			||||||
 | 
							 * might end up locking an invalid rq.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
		set_task_cpu(p, cpu);
 | 
							set_task_cpu(p, cpu);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rq = __task_rq_lock(p);
 | 
						rq = cpu_rq(cpu);
 | 
				
			||||||
 | 
						raw_spin_lock(&rq->lock);
 | 
				
			||||||
	update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We migrated the task without holding either rq->lock, however
 | 
				
			||||||
 | 
						 * since the task is not on the task list itself, nobody else
 | 
				
			||||||
 | 
						 * will try and migrate the task, hence the rq should match the
 | 
				
			||||||
 | 
						 * cpu we just moved it to.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						WARN_ON(task_cpu(p) != cpu);
 | 
				
			||||||
	WARN_ON(p->state != TASK_WAKING);
 | 
						WARN_ON(p->state != TASK_WAKING);
 | 
				
			||||||
	cpu = task_cpu(p);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SCHEDSTATS
 | 
					#ifdef CONFIG_SCHEDSTATS
 | 
				
			||||||
	schedstat_inc(rq, ttwu_count);
 | 
						schedstat_inc(rq, ttwu_count);
 | 
				
			||||||
| 
						 | 
					@ -2647,7 +2679,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long flags;
 | 
						unsigned long flags;
 | 
				
			||||||
	struct rq *rq;
 | 
						struct rq *rq;
 | 
				
			||||||
	int cpu __maybe_unused = get_cpu();
 | 
						int cpu = get_cpu();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					#ifdef CONFIG_SMP
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -2663,7 +2695,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 | 
				
			||||||
	set_task_cpu(p, cpu);
 | 
						set_task_cpu(p, cpu);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rq = task_rq_lock(p, &flags);
 | 
						/*
 | 
				
			||||||
 | 
						 * Since the task is not on the rq and we still have TASK_WAKING set
 | 
				
			||||||
 | 
						 * nobody else will migrate this task.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						rq = cpu_rq(cpu);
 | 
				
			||||||
 | 
						raw_spin_lock_irqsave(&rq->lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(p->state != TASK_WAKING);
 | 
						BUG_ON(p->state != TASK_WAKING);
 | 
				
			||||||
	p->state = TASK_RUNNING;
 | 
						p->state = TASK_RUNNING;
 | 
				
			||||||
	update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
| 
						 | 
					@ -7156,27 +7194,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 | 
				
			||||||
	struct rq *rq;
 | 
						struct rq *rq;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Since we rely on wake-ups to migrate sleeping tasks, don't change
 | 
					 | 
				
			||||||
	 * the ->cpus_allowed mask from under waking tasks, which would be
 | 
					 | 
				
			||||||
	 * possible when we change rq->lock in ttwu(), so synchronize against
 | 
					 | 
				
			||||||
	 * TASK_WAKING to avoid that.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * Make an exception for freshly cloned tasks, since cpuset namespaces
 | 
					 | 
				
			||||||
	 * might move the task about, we have to validate the target in
 | 
					 | 
				
			||||||
	 * wake_up_new_task() anyway since the cpu might have gone away.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
again:
 | 
					 | 
				
			||||||
	while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
 | 
					 | 
				
			||||||
		cpu_relax();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq = task_rq_lock(p, &flags);
 | 
						rq = task_rq_lock(p, &flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
 | 
					 | 
				
			||||||
		task_rq_unlock(rq, &flags);
 | 
					 | 
				
			||||||
		goto again;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
 | 
						if (!cpumask_intersects(new_mask, cpu_active_mask)) {
 | 
				
			||||||
		ret = -EINVAL;
 | 
							ret = -EINVAL;
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue