mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	cpuset: handle race between CPU hotplug and cpuset_hotplug_work
A discrepancy between cpu_online_mask and cpuset's effective_cpus
mask is inevitable during hotplug since cpuset defers updating of
effective_cpus mask using a workqueue, during which time nothing
prevents the system from more hotplug operations.  For that reason
guarantee_online_cpus() walks up the cpuset hierarchy until it finds
an intersection under the assumption that top cpuset's effective_cpus
mask intersects with cpu_online_mask even with such a race occurring.
However a sequence of CPU hotplugs can open a time window, during which
none of the effective CPUs in the top cpuset intersect with
cpu_online_mask.
For example when there are 4 possible CPUs 0-3 and only CPU0 is online:
  ========================  ===========================
   cpu_online_mask           top_cpuset.effective_cpus
  ========================  ===========================
   echo 1 > cpu2/online.
   CPU hotplug notifier woke up hotplug work but not yet scheduled.
      [0,2]                     [0]
   echo 0 > cpu0/online.
   The workqueue is still runnable.
      [2]                       [0]
  ========================  ===========================
  Now there is no intersection between cpu_online_mask and
  top_cpuset.effective_cpus.  Thus invoking sys_sched_setaffinity() at
  this moment can cause following:
   Unable to handle kernel NULL pointer dereference at virtual address 000000d0
   ------------[ cut here ]------------
   Kernel BUG at ffffffc0001389b0 [verbose debug info unavailable]
   Internal error: Oops - BUG: 96000005 [#1] PREEMPT SMP
   Modules linked in:
   CPU: 2 PID: 1420 Comm: taskset Tainted: G        W       4.4.8+ #98
   task: ffffffc06a5c4880 ti: ffffffc06e124000 task.ti: ffffffc06e124000
   PC is at guarantee_online_cpus+0x2c/0x58
   LR is at cpuset_cpus_allowed+0x4c/0x6c
   <snip>
   Process taskset (pid: 1420, stack limit = 0xffffffc06e124020)
   Call trace:
   [<ffffffc0001389b0>] guarantee_online_cpus+0x2c/0x58
   [<ffffffc00013b208>] cpuset_cpus_allowed+0x4c/0x6c
   [<ffffffc0000d61f0>] sched_setaffinity+0xc0/0x1ac
   [<ffffffc0000d6374>] SyS_sched_setaffinity+0x98/0xac
   [<ffffffc000085cb0>] el0_svc_naked+0x24/0x28
The top cpuset's effective_cpus are guaranteed to be identical to
cpu_online_mask eventually.  Hence fall back to cpu_online_mask when
there is no intersection between top cpuset's effective_cpus and
cpu_online_mask.
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: cgroups@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org> # 3.17+
Signed-off-by: Tejun Heo <tj@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									568ac88821
								
							
						
					
					
						commit
						28b89b9e6f
					
				
					 1 changed files with 14 additions and 3 deletions
				
			
		|  | @ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = { | |||
| /*
 | ||||
|  * Return in pmask the portion of a cpusets's cpus_allowed that | ||||
|  * are online.  If none are online, walk up the cpuset hierarchy | ||||
|  * until we find one that does have some online cpus.  The top | ||||
|  * cpuset always has some cpus online. | ||||
|  * until we find one that does have some online cpus. | ||||
|  * | ||||
|  * One way or another, we guarantee to return some non-empty subset | ||||
|  * of cpu_online_mask. | ||||
|  | @ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = { | |||
|  */ | ||||
| static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | ||||
| { | ||||
| 	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) | ||||
| 	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { | ||||
| 		cs = parent_cs(cs); | ||||
| 		if (unlikely(!cs)) { | ||||
| 			/*
 | ||||
| 			 * The top cpuset doesn't have any online cpu as a | ||||
| 			 * consequence of a race between cpuset_hotplug_work | ||||
| 			 * and cpu hotplug notifier.  But we know the top | ||||
| 			 * cpuset's effective_cpus is on its way to to be | ||||
| 			 * identical to cpu_online_mask. | ||||
| 			 */ | ||||
| 			cpumask_copy(pmask, cpu_online_mask); | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| 	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Joonwoo Park
						Joonwoo Park