mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	kthread: Default affine kthread to its preferred NUMA node
Kthreads attached to a preferred NUMA node for their task structure allocation can also be assumed to run preferrably within that same node. A more precise affinity is usually notified by calling kthread_create_on_cpu() or kthread_bind[_mask]() before the first wakeup. For the others, a default affinity to the node is desired and sometimes implemented with more or less success when it comes to deal with hotplug events and nohz_full / CPU Isolation interactions: - kcompactd is affine to its node and handles hotplug but not CPU Isolation - kswapd is affine to its node and ignores hotplug and CPU Isolation - A bunch of drivers create their kthreads on a specific node and don't take care about affining further. Handle that default node affinity preference at the generic level instead, provided a kthread is created on an actual node and doesn't apply any specific affinity such as a given CPU or a custom cpumask to bind to before its first wake-up. This generic handling is aware of CPU hotplug events and CPU isolation such that: * When a housekeeping CPU goes up that is part of the node of a given kthread, the related task is re-affined to that own node if it was previously running on the default last resort online housekeeping set from other nodes. * When a housekeeping CPU goes down while it was part of the node of a kthread, the running task is migrated (or the sleeping task is woken up) automatically by the scheduler to other housekeepers within the same node or, as a last resort, to all housekeepers from other nodes. Acked-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
This commit is contained in:
		
							parent
							
								
									5eacb68a35
								
							
						
					
					
						commit
						d1a8919758
					
				
					 2 changed files with 106 additions and 1 deletions
				
			
		|  | @ -240,6 +240,7 @@ enum cpuhp_state { | |||
| 	CPUHP_AP_WORKQUEUE_ONLINE, | ||||
| 	CPUHP_AP_RANDOM_ONLINE, | ||||
| 	CPUHP_AP_RCUTREE_ONLINE, | ||||
| 	CPUHP_AP_KTHREADS_ONLINE, | ||||
| 	CPUHP_AP_BASE_CACHEINFO_ONLINE, | ||||
| 	CPUHP_AP_ONLINE_DYN, | ||||
| 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 40, | ||||
|  |  | |||
							
								
								
									
										106
									
								
								kernel/kthread.c
									
									
									
									
									
								
							
							
						
						
									
										106
									
								
								kernel/kthread.c
									
									
									
									
									
								
							|  | @ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); | |||
| static LIST_HEAD(kthread_create_list); | ||||
| struct task_struct *kthreadd_task; | ||||
| 
 | ||||
| static LIST_HEAD(kthreads_hotplug); | ||||
| static DEFINE_MUTEX(kthreads_hotplug_lock); | ||||
| 
 | ||||
| struct kthread_create_info | ||||
| { | ||||
| 	/* Information passed to kthread() from kthreadd. */ | ||||
|  | @ -53,6 +56,7 @@ struct kthread_create_info | |||
| struct kthread { | ||||
| 	unsigned long flags; | ||||
| 	unsigned int cpu; | ||||
| 	unsigned int node; | ||||
| 	int started; | ||||
| 	int result; | ||||
| 	int (*threadfn)(void *); | ||||
|  | @ -64,6 +68,8 @@ struct kthread { | |||
| #endif | ||||
| 	/* To store the full name if task comm is truncated. */ | ||||
| 	char *full_name; | ||||
| 	struct task_struct *task; | ||||
| 	struct list_head hotplug_node; | ||||
| }; | ||||
| 
 | ||||
| enum KTHREAD_BITS { | ||||
|  | @ -122,8 +128,11 @@ bool set_kthread_struct(struct task_struct *p) | |||
| 
 | ||||
| 	init_completion(&kthread->exited); | ||||
| 	init_completion(&kthread->parked); | ||||
| 	INIT_LIST_HEAD(&kthread->hotplug_node); | ||||
| 	p->vfork_done = &kthread->exited; | ||||
| 
 | ||||
| 	kthread->task = p; | ||||
| 	kthread->node = tsk_fork_get_node(current); | ||||
| 	p->worker_private = kthread; | ||||
| 	return true; | ||||
| } | ||||
|  | @ -314,6 +323,11 @@ void __noreturn kthread_exit(long result) | |||
| { | ||||
| 	struct kthread *kthread = to_kthread(current); | ||||
| 	kthread->result = result; | ||||
| 	if (!list_empty(&kthread->hotplug_node)) { | ||||
| 		mutex_lock(&kthreads_hotplug_lock); | ||||
| 		list_del(&kthread->hotplug_node); | ||||
| 		mutex_unlock(&kthreads_hotplug_lock); | ||||
| 	} | ||||
| 	do_exit(0); | ||||
| } | ||||
| EXPORT_SYMBOL(kthread_exit); | ||||
|  | @ -339,6 +353,48 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code) | |||
| } | ||||
| EXPORT_SYMBOL(kthread_complete_and_exit); | ||||
| 
 | ||||
| static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask) | ||||
| { | ||||
| 	cpumask_and(cpumask, cpumask_of_node(kthread->node), | ||||
| 		    housekeeping_cpumask(HK_TYPE_KTHREAD)); | ||||
| 
 | ||||
| 	if (cpumask_empty(cpumask)) | ||||
| 		cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD)); | ||||
| } | ||||
| 
 | ||||
| static void kthread_affine_node(void) | ||||
| { | ||||
| 	struct kthread *kthread = to_kthread(current); | ||||
| 	cpumask_var_t affinity; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(kthread_is_per_cpu(current)); | ||||
| 
 | ||||
| 	if (kthread->node == NUMA_NO_NODE) { | ||||
| 		housekeeping_affine(current, HK_TYPE_KTHREAD); | ||||
| 	} else { | ||||
| 		if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) { | ||||
| 			WARN_ON_ONCE(1); | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		mutex_lock(&kthreads_hotplug_lock); | ||||
| 		WARN_ON_ONCE(!list_empty(&kthread->hotplug_node)); | ||||
| 		list_add_tail(&kthread->hotplug_node, &kthreads_hotplug); | ||||
| 		/*
 | ||||
| 		 * The node cpumask is racy when read from kthread() but: | ||||
| 		 * - a racing CPU going down will either fail on the subsequent | ||||
| 		 *   call to set_cpus_allowed_ptr() or be migrated to housekeepers | ||||
| 		 *   afterwards by the scheduler. | ||||
| 		 * - a racing CPU going up will be handled by kthreads_online_cpu() | ||||
| 		 */ | ||||
| 		kthread_fetch_affinity(kthread, affinity); | ||||
| 		set_cpus_allowed_ptr(current, affinity); | ||||
| 		mutex_unlock(&kthreads_hotplug_lock); | ||||
| 
 | ||||
| 		free_cpumask_var(affinity); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int kthread(void *_create) | ||||
| { | ||||
| 	static const struct sched_param param = { .sched_priority = 0 }; | ||||
|  | @ -369,7 +425,6 @@ static int kthread(void *_create) | |||
| 	 * back to default in case they have been changed. | ||||
| 	 */ | ||||
| 	sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); | ||||
| 	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD)); | ||||
| 
 | ||||
| 	/* OK, tell user we're spawned, wait for stop or wakeup */ | ||||
| 	__set_current_state(TASK_UNINTERRUPTIBLE); | ||||
|  | @ -385,6 +440,9 @@ static int kthread(void *_create) | |||
| 
 | ||||
| 	self->started = 1; | ||||
| 
 | ||||
| 	if (!(current->flags & PF_NO_SETAFFINITY)) | ||||
| 		kthread_affine_node(); | ||||
| 
 | ||||
| 	ret = -EINTR; | ||||
| 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) { | ||||
| 		cgroup_kthread_ready(); | ||||
|  | @ -781,6 +839,52 @@ int kthreadd(void *unused) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Re-affine kthreads according to their preferences | ||||
|  * and the newly online CPU. The CPU down part is handled | ||||
|  * by select_fallback_rq() which default re-affines to | ||||
|  * housekeepers in case the preferred affinity doesn't | ||||
|  * apply anymore. | ||||
|  */ | ||||
| static int kthreads_online_cpu(unsigned int cpu) | ||||
| { | ||||
| 	cpumask_var_t affinity; | ||||
| 	struct kthread *k; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	guard(mutex)(&kthreads_hotplug_lock); | ||||
| 
 | ||||
| 	if (list_empty(&kthreads_hotplug)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	ret = 0; | ||||
| 
 | ||||
| 	list_for_each_entry(k, &kthreads_hotplug, hotplug_node) { | ||||
| 		if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) || | ||||
| 				 kthread_is_per_cpu(k->task) || | ||||
| 				 k->node == NUMA_NO_NODE)) { | ||||
| 			ret = -EINVAL; | ||||
| 			continue; | ||||
| 		} | ||||
| 		kthread_fetch_affinity(k, affinity); | ||||
| 		set_cpus_allowed_ptr(k->task, affinity); | ||||
| 	} | ||||
| 
 | ||||
| 	free_cpumask_var(affinity); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int kthreads_init(void) | ||||
| { | ||||
| 	return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online", | ||||
| 				kthreads_online_cpu, NULL); | ||||
| } | ||||
| early_initcall(kthreads_init); | ||||
| 
 | ||||
| void __kthread_init_worker(struct kthread_worker *worker, | ||||
| 				const char *name, | ||||
| 				struct lock_class_key *key) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Frederic Weisbecker
						Frederic Weisbecker