mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	sched_ext: idle: Introduce node-aware idle cpu kfunc helpers
Introduce a new kfunc to retrieve the node associated to a CPU: int scx_bpf_cpu_node(s32 cpu) Add the following kfuncs to provide BPF schedulers direct access to per-node idle cpumasks information: const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) Moreover, trigger an scx error when any of the non-node aware idle CPU kfuncs are used when SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled. Cc: Yury Norov [NVIDIA] <yury.norov@gmail.com> Signed-off-by: Andrea Righi <arighi@nvidia.com> Reviewed-by: Yury Norov [NVIDIA] <yury.norov@gmail.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
		
							parent
							
								
									48849271e6
								
							
						
					
					
						commit
						01059219b0
					
				
					 3 changed files with 218 additions and 0 deletions
				
			
		|  | @ -728,6 +728,33 @@ void scx_idle_disable(void) | |||
| /********************************************************************************
 | ||||
|  * Helpers that can be called from the BPF scheduler. | ||||
|  */ | ||||
| 
 | ||||
| static int validate_node(int node) | ||||
| { | ||||
| 	if (!static_branch_likely(&scx_builtin_idle_per_node)) { | ||||
| 		scx_ops_error("per-node idle tracking is disabled"); | ||||
| 		return -EOPNOTSUPP; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Return no entry for NUMA_NO_NODE (not a critical scx error) */ | ||||
| 	if (node == NUMA_NO_NODE) | ||||
| 		return -ENOENT; | ||||
| 
 | ||||
| 	/* Make sure node is in a valid range */ | ||||
| 	if (node < 0 || node >= nr_node_ids) { | ||||
| 		scx_ops_error("invalid node %d", node); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Make sure the node is part of the set of possible nodes */ | ||||
| 	if (!node_possible(node)) { | ||||
| 		scx_ops_error("unavailable node %d", node); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	return node; | ||||
| } | ||||
| 
 | ||||
| __bpf_kfunc_start_defs(); | ||||
| 
 | ||||
| static bool check_builtin_idle_enabled(void) | ||||
|  | @ -739,6 +766,23 @@ static bool check_builtin_idle_enabled(void) | |||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_cpu_node - Return the NUMA node the given @cpu belongs to, or | ||||
|  *		      trigger an error if @cpu is invalid | ||||
|  * @cpu: target CPU | ||||
|  */ | ||||
| __bpf_kfunc int scx_bpf_cpu_node(s32 cpu) | ||||
| { | ||||
| #ifdef CONFIG_NUMA | ||||
| 	if (!ops_cpu_valid(cpu, NULL)) | ||||
| 		return NUMA_NO_NODE; | ||||
| 
 | ||||
| 	return cpu_to_node(cpu); | ||||
| #else | ||||
| 	return 0; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() | ||||
|  * @p: task_struct to select a CPU for | ||||
|  | @ -771,6 +815,28 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, | |||
| 	return prev_cpu; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_get_idle_cpumask_node - Get a referenced kptr to the | ||||
|  * idle-tracking per-CPU cpumask of a target NUMA node. | ||||
|  * @node: target NUMA node | ||||
|  * | ||||
|  * Returns an empty cpumask if idle tracking is not enabled, if @node is | ||||
|  * not valid, or running on a UP kernel. In this case the actual error will | ||||
|  * be reported to the BPF scheduler via scx_ops_error(). | ||||
|  */ | ||||
| __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) | ||||
| { | ||||
| 	node = validate_node(node); | ||||
| 	if (node < 0) | ||||
| 		return cpu_none_mask; | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| 	return idle_cpumask(node)->cpu; | ||||
| #else | ||||
| 	return cpu_none_mask; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_get_idle_cpumask - Get a referenced kptr to the idle-tracking | ||||
|  * per-CPU cpumask. | ||||
|  | @ -795,6 +861,32 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void) | |||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_get_idle_smtmask_node - Get a referenced kptr to the | ||||
|  * idle-tracking, per-physical-core cpumask of a target NUMA node. Can be | ||||
|  * used to determine if an entire physical core is free. | ||||
|  * @node: target NUMA node | ||||
|  * | ||||
|  * Returns an empty cpumask if idle tracking is not enabled, if @node is | ||||
|  * not valid, or running on a UP kernel. In this case the actual error will | ||||
|  * be reported to the BPF scheduler via scx_ops_error(). | ||||
|  */ | ||||
| __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) | ||||
| { | ||||
| 	node = validate_node(node); | ||||
| 	if (node < 0) | ||||
| 		return cpu_none_mask; | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| 	if (sched_smt_active()) | ||||
| 		return idle_cpumask(node)->smt; | ||||
| 	else | ||||
| 		return idle_cpumask(node)->cpu; | ||||
| #else | ||||
| 	return cpu_none_mask; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_get_idle_smtmask - Get a referenced kptr to the idle-tracking, | ||||
|  * per-physical-core cpumask. Can be used to determine if an entire physical | ||||
|  | @ -859,6 +951,35 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) | |||
| 		return false; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_pick_idle_cpu_node - Pick and claim an idle cpu from @node | ||||
|  * @cpus_allowed: Allowed cpumask | ||||
|  * @node: target NUMA node | ||||
|  * @flags: %SCX_PICK_IDLE_* flags | ||||
|  * | ||||
|  * Pick and claim an idle cpu in @cpus_allowed from the NUMA node @node. | ||||
|  * | ||||
|  * Returns the picked idle cpu number on success, or -%EBUSY if no matching | ||||
|  * cpu was found. | ||||
|  * | ||||
|  * The search starts from @node and proceeds to other online NUMA nodes in | ||||
|  * order of increasing distance (unless SCX_PICK_IDLE_IN_NODE is specified, | ||||
|  * in which case the search is limited to the target @node). | ||||
|  * | ||||
|  * Always returns an error if ops.update_idle() is implemented and | ||||
|  * %SCX_OPS_KEEP_BUILTIN_IDLE is not set, or if | ||||
|  * %SCX_OPS_BUILTIN_IDLE_PER_NODE is not set. | ||||
|  */ | ||||
| __bpf_kfunc s32 scx_bpf_pick_idle_cpu_node(const struct cpumask *cpus_allowed, | ||||
| 					   int node, u64 flags) | ||||
| { | ||||
| 	node = validate_node(node); | ||||
| 	if (node < 0) | ||||
| 		return node; | ||||
| 
 | ||||
| 	return scx_pick_idle_cpu(cpus_allowed, node, flags); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu | ||||
|  * @cpus_allowed: Allowed cpumask | ||||
|  | @ -877,16 +998,64 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) | |||
|  * | ||||
|  * Unavailable if ops.update_idle() is implemented and | ||||
|  * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. | ||||
|  * | ||||
|  * Always returns an error if %SCX_OPS_BUILTIN_IDLE_PER_NODE is set, use | ||||
|  * scx_bpf_pick_idle_cpu_node() instead. | ||||
|  */ | ||||
| __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, | ||||
| 				      u64 flags) | ||||
| { | ||||
| 	if (static_branch_maybe(CONFIG_NUMA, &scx_builtin_idle_per_node)) { | ||||
| 		scx_ops_error("per-node idle tracking is enabled"); | ||||
| 		return -EBUSY; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!check_builtin_idle_enabled()) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	return scx_pick_idle_cpu(cpus_allowed, NUMA_NO_NODE, flags); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_pick_any_cpu_node - Pick and claim an idle cpu if available | ||||
|  *			       or pick any CPU from @node | ||||
|  * @cpus_allowed: Allowed cpumask | ||||
|  * @node: target NUMA node | ||||
|  * @flags: %SCX_PICK_IDLE_CPU_* flags | ||||
|  * | ||||
|  * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any | ||||
|  * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu | ||||
|  * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is | ||||
|  * empty. | ||||
|  * | ||||
|  * The search starts from @node and proceeds to other online NUMA nodes in | ||||
|  * order of increasing distance (unless SCX_PICK_IDLE_IN_NODE is specified, | ||||
|  * in which case the search is limited to the target @node). | ||||
|  * | ||||
|  * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not | ||||
|  * set, this function can't tell which CPUs are idle and will always pick any | ||||
|  * CPU. | ||||
|  */ | ||||
| __bpf_kfunc s32 scx_bpf_pick_any_cpu_node(const struct cpumask *cpus_allowed, | ||||
| 					  int node, u64 flags) | ||||
| { | ||||
| 	s32 cpu; | ||||
| 
 | ||||
| 	node = validate_node(node); | ||||
| 	if (node < 0) | ||||
| 		return node; | ||||
| 
 | ||||
| 	cpu = scx_pick_idle_cpu(cpus_allowed, node, flags); | ||||
| 	if (cpu >= 0) | ||||
| 		return cpu; | ||||
| 
 | ||||
| 	cpu = cpumask_any_distribute(cpus_allowed); | ||||
| 	if (cpu < nr_cpu_ids) | ||||
| 		return cpu; | ||||
| 	else | ||||
| 		return -EBUSY; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU | ||||
|  * @cpus_allowed: Allowed cpumask | ||||
|  | @ -900,12 +1069,20 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, | |||
|  * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not | ||||
|  * set, this function can't tell which CPUs are idle and will always pick any | ||||
|  * CPU. | ||||
|  * | ||||
|  * Always returns an error if %SCX_OPS_BUILTIN_IDLE_PER_NODE is set, use | ||||
|  * scx_bpf_pick_any_cpu_node() instead. | ||||
|  */ | ||||
| __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, | ||||
| 				     u64 flags) | ||||
| { | ||||
| 	s32 cpu; | ||||
| 
 | ||||
| 	if (static_branch_maybe(CONFIG_NUMA, &scx_builtin_idle_per_node)) { | ||||
| 		scx_ops_error("per-node idle tracking is enabled"); | ||||
| 		return -EBUSY; | ||||
| 	} | ||||
| 
 | ||||
| 	if (static_branch_likely(&scx_builtin_idle_enabled)) { | ||||
| 		cpu = scx_pick_idle_cpu(cpus_allowed, NUMA_NO_NODE, flags); | ||||
| 		if (cpu >= 0) | ||||
|  | @ -922,11 +1099,16 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, | |||
| __bpf_kfunc_end_defs(); | ||||
| 
 | ||||
| BTF_KFUNCS_START(scx_kfunc_ids_idle) | ||||
| BTF_ID_FLAGS(func, scx_bpf_cpu_node) | ||||
| BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask_node, KF_ACQUIRE) | ||||
| BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) | ||||
| BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask_node, KF_ACQUIRE) | ||||
| BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) | ||||
| BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) | ||||
| BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) | ||||
| BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu_node, KF_RCU) | ||||
| BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) | ||||
| BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU) | ||||
| BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) | ||||
| BTF_KFUNCS_END(scx_kfunc_ids_idle) | ||||
| 
 | ||||
|  |  | |||
|  | @ -71,14 +71,19 @@ u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; | |||
| u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; | ||||
| void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak; | ||||
| u32 scx_bpf_nr_cpu_ids(void) __ksym __weak; | ||||
| int scx_bpf_cpu_node(s32 cpu) __ksym __weak; | ||||
| const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak; | ||||
| const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak; | ||||
| void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak; | ||||
| const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) __ksym __weak; | ||||
| const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym; | ||||
| const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) __ksym __weak; | ||||
| const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym; | ||||
| void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym; | ||||
| bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; | ||||
| s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak; | ||||
| s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; | ||||
| s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak; | ||||
| s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; | ||||
| bool scx_bpf_task_running(const struct task_struct *p) __ksym; | ||||
| s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; | ||||
|  |  | |||
|  | @ -182,6 +182,37 @@ static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags) | |||
| 	 scx_bpf_now() :							\ | ||||
| 	 bpf_ktime_get_ns()) | ||||
| 
 | ||||
| /*
 | ||||
|  * | ||||
|  * v6.15: Introduce NUMA-aware kfuncs to operate with per-node idle | ||||
|  * cpumasks. | ||||
|  * | ||||
|  * Preserve the following __COMPAT_scx_*_node macros until v6.17. | ||||
|  */ | ||||
| #define __COMPAT_scx_bpf_cpu_node(cpu)						\ | ||||
| 	(bpf_ksym_exists(scx_bpf_cpu_node) ?					\ | ||||
| 	 scx_bpf_cpu_node(cpu) : 0) | ||||
| 
 | ||||
| #define __COMPAT_scx_bpf_get_idle_cpumask_node(node)				\ | ||||
| 	(bpf_ksym_exists(scx_bpf_get_idle_cpumask_node) ?			\ | ||||
| 	 scx_bpf_get_idle_cpumask_node(node) :					\ | ||||
| 	 scx_bpf_get_idle_cpumask())						\ | ||||
| 
 | ||||
| #define __COMPAT_scx_bpf_get_idle_smtmask_node(node)				\ | ||||
| 	(bpf_ksym_exists(scx_bpf_get_idle_smtmask_node) ?			\ | ||||
| 	 scx_bpf_get_idle_smtmask_node(node) :					\ | ||||
| 	 scx_bpf_get_idle_smtmask()) | ||||
| 
 | ||||
| #define __COMPAT_scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags)		\ | ||||
| 	(bpf_ksym_exists(scx_bpf_pick_idle_cpu_node) ?				\ | ||||
| 	 scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) :		\ | ||||
| 	 scx_bpf_pick_idle_cpu(cpus_allowed, flags)) | ||||
| 
 | ||||
| #define __COMPAT_scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags)		\ | ||||
| 	(bpf_ksym_exists(scx_bpf_pick_any_cpu_node) ?				\ | ||||
| 	 scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) :			\ | ||||
| 	 scx_bpf_pick_any_cpu(cpus_allowed, flags)) | ||||
| 
 | ||||
| /*
 | ||||
|  * Define sched_ext_ops. This may be expanded to define multiple variants for | ||||
|  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Andrea Righi
						Andrea Righi