mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 00:28:52 +02:00 
			
		
		
		
	 3f020399e4
			
		
	
	
		3f020399e4
		
	
	
	
	
		
			
			- Core facilities:
 
     - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which optimizes
       fair-class preemption by delaying preemption requests to the
       tick boundary, while working as full preemption for RR/FIFO/DEADLINE
       classes. (Peter Zijlstra)
 
         - x86: Enable Lazy preemption (Peter Zijlstra)
         - riscv: Enable Lazy preemption (Jisheng Zhang)
 
     - Initialize idle tasks only once (Thomas Gleixner)
 
     - sched/ext: Remove sched_fork() hack (Thomas Gleixner)
 
  - Fair scheduler:
     - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie)
 
  - Idle loop:
       Optimize the generic idle loop by removing unnecessary
       memory barrier (Zhongqiu Han)
 
  - RSEQ:
     - Improve cache locality of RSEQ concurrency IDs for
       intermittent workloads (Mathieu Desnoyers)
 
  - Waitqueues:
     - Make wake_up_{bit,var} less fragile (Neil Brown)
 
  - PSI:
     - Pass enqueue/dequeue flags to psi callbacks directly (Johannes Weiner)
 
  - Preparatory patches for proxy execution:
     - core: Add move_queued_task_locked helper (Connor O'Brien)
     - core: Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien)
     - core: Split out __schedule() deactivate task logic into a helper (John Stultz)
     - core: Split scheduler and execution contexts (Peter Zijlstra)
     - locking/mutex: Make mutex::wait_lock irq safe (Juri Lelli)
     - locking/mutex: Expose __mutex_owner() (Juri Lelli)
     - locking/mutex: Remove wakeups from under mutex::wait_lock (Peter Zijlstra)
 
  - Misc fixes and cleanups:
     - core: Remove unused __HAVE_THREAD_FUNCTIONS hook support (David Disseldorp)
     - core: Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej Siewior)
     - wait: Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert)
     - fair: remove the DOUBLE_TICK feature (Huang Shijie)
     - fair: fix the comment for PREEMPT_SHORT (Huang Shijie)
     - uclamp: Fix unnused variable warning (Christian Loehle)
     - rt: No PREEMPT_RT=y for all{yes,mod}config
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmc7fnQRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1hZTBAAozVdWA2m51aNa67HvAZta/olmrIagVbW
 inwbTgqa8b+UfeWEuKOfrZr5khjEh6pLgR3dBTib1uH6xxYj/Okds+qbPWSBPVLh
 yzavlm/zJZM1U1XtxE3eyVfqWik4GrY7DoIMDQQr+YH7rNXonJeJkll38OI2E5MC
 q3Q01qyMo8RJJX8qkf3f8ObOoP/51NsVniTw0Zb2fzEhXz8FjezLlxk6cMfgSkJG
 lg9gfIwUZ7Xg5neRo4kJcc3Ht31KYOhWSiupBJzRD1hss/N/AybvMcTX/Cm8d07w
 HIAdDDAn84o46miFo/a0V/hsJZ72idWbqxVJUCtaezrpOUiFkG+uInRvG/ynr0lF
 5dEI9f+6PUw8Nc7L72IyHkobjPqS2IefSaxYYCBKmxMX2qrenfTor/pKiWzzhBIl
 rX3MZSuUJ8NjV4rNGD/qXRM1IsMJrsDwxDyv+sRec3XdH33x286ds6aAUEPDQ6N7
 96VS0sOKcNUJN8776ErNjlIxRl8HTlpkaO3nZlQIfXgTlXUpRvOuKbEWqP+606lo
 oANgJTKgUhgJPWZnvmdRxDjSiOp93QcImjus9i1tN81FGiEDleONsJUxu2Di1E5+
 s1nCiytjq+cdvzCqFyiOZUh+g6kSZ4yXxNgLg2UvbXzX1zOeUQT3WtyKUhMPXhU8
 esh1TgbUbpE=
 =Zcqj
 -----END PGP SIGNATURE-----
Merge tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
 "Core facilities:
   - Add the "Lazy preemption" model (CONFIG_PREEMPT_LAZY=y), which
     optimizes fair-class preemption by delaying preemption requests to
     the tick boundary, while working as full preemption for
     RR/FIFO/DEADLINE classes. (Peter Zijlstra)
        - x86: Enable Lazy preemption (Peter Zijlstra)
        - riscv: Enable Lazy preemption (Jisheng Zhang)
   - Initialize idle tasks only once (Thomas Gleixner)
   - sched/ext: Remove sched_fork() hack (Thomas Gleixner)
  Fair scheduler:
   - Optimize the PLACE_LAG when se->vlag is zero (Huang Shijie)
  Idle loop:
   - Optimize the generic idle loop by removing unnecessary memory
     barrier (Zhongqiu Han)
  RSEQ:
   - Improve cache locality of RSEQ concurrency IDs for intermittent
     workloads (Mathieu Desnoyers)
  Waitqueues:
   - Make wake_up_{bit,var} less fragile (Neil Brown)
  PSI:
   - Pass enqueue/dequeue flags to psi callbacks directly (Johannes
     Weiner)
  Preparatory patches for proxy execution:
   - Add move_queued_task_locked helper (Connor O'Brien)
   - Consolidate pick_*_task to task_is_pushable helper (Connor O'Brien)
   - Split out __schedule() deactivate task logic into a helper (John
     Stultz)
   - Split scheduler and execution contexts (Peter Zijlstra)
   - Make mutex::wait_lock irq safe (Juri Lelli)
   - Expose __mutex_owner() (Juri Lelli)
   - Remove wakeups from under mutex::wait_lock (Peter Zijlstra)
  Misc fixes and cleanups:
   - Remove unused __HAVE_THREAD_FUNCTIONS hook support (David
     Disseldorp)
   - Update the comment for TIF_NEED_RESCHED_LAZY (Sebastian Andrzej
     Siewior)
   - Remove unused bit_wait_io_timeout (Dr. David Alan Gilbert)
   - remove the DOUBLE_TICK feature (Huang Shijie)
   - fix the comment for PREEMPT_SHORT (Huang Shijie)
   - Fix unnused variable warning (Christian Loehle)
   - No PREEMPT_RT=y for all{yes,mod}config"
* tag 'sched-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  sched, x86: Update the comment for TIF_NEED_RESCHED_LAZY.
  sched: No PREEMPT_RT=y for all{yes,mod}config
  riscv: add PREEMPT_LAZY support
  sched, x86: Enable Lazy preemption
  sched: Enable PREEMPT_DYNAMIC for PREEMPT_RT
  sched: Add Lazy preemption model
  sched: Add TIF_NEED_RESCHED_LAZY infrastructure
  sched/ext: Remove sched_fork() hack
  sched: Initialize idle tasks only once
  sched: psi: pass enqueue/dequeue flags to psi callbacks directly
  sched/uclamp: Fix unnused variable warning
  sched: Split scheduler and execution contexts
  sched: Split out __schedule() deactivate task logic into a helper
  sched: Consolidate pick_*_task to task_is_pushable helper
  sched: Add move_queued_task_locked helper
  locking/mutex: Expose __mutex_owner()
  locking/mutex: Make mutex::wait_lock irq safe
  locking/mutex: Remove wakeups from under mutex::wait_lock
  sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads
  sched: idle: Optimize the generic idle loop by removing needless memory barrier
  ...
		
	
			
		
			
				
	
	
		
			287 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			287 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-only
 | |
| /*
 | |
|  * PREEMPT_RT substitution for spin/rw_locks
 | |
|  *
 | |
|  * spinlocks and rwlocks on RT are based on rtmutexes, with a few twists to
 | |
|  * resemble the non RT semantics:
 | |
|  *
 | |
|  * - Contrary to plain rtmutexes, spinlocks and rwlocks are state
 | |
|  *   preserving. The task state is saved before blocking on the underlying
 | |
|  *   rtmutex, and restored when the lock has been acquired. Regular wakeups
 | |
|  *   during that time are redirected to the saved state so no wake up is
 | |
|  *   missed.
 | |
|  *
 | |
|  * - Non RT spin/rwlocks disable preemption and eventually interrupts.
 | |
|  *   Disabling preemption has the side effect of disabling migration and
 | |
|  *   preventing RCU grace periods.
 | |
|  *
 | |
|  *   The RT substitutions explicitly disable migration and take
 | |
|  *   rcu_read_lock() across the lock held section.
 | |
|  */
 | |
| #include <linux/spinlock.h>
 | |
| #include <linux/export.h>
 | |
| 
 | |
| #define RT_MUTEX_BUILD_SPINLOCKS
 | |
| #include "rtmutex.c"
 | |
| 
 | |
| /*
 | |
|  * __might_resched() skips the state check as rtlocks are state
 | |
|  * preserving. Take RCU nesting into account as spin/read/write_lock() can
 | |
|  * legitimately nest into an RCU read side critical section.
 | |
|  */
 | |
| #define RTLOCK_RESCHED_OFFSETS						\
 | |
| 	(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
 | |
| 
 | |
| #define rtlock_might_resched()						\
 | |
| 	__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
 | |
| 
 | |
| static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
 | |
| {
 | |
| 	lockdep_assert(!current->pi_blocked_on);
 | |
| 
 | |
| 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
 | |
| 		rtlock_slowlock(rtm);
 | |
| }
 | |
| 
 | |
| static __always_inline void __rt_spin_lock(spinlock_t *lock)
 | |
| {
 | |
| 	rtlock_might_resched();
 | |
| 	rtlock_lock(&lock->lock);
 | |
| 	rcu_read_lock();
 | |
| 	migrate_disable();
 | |
| }
 | |
| 
 | |
| void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU)
 | |
| {
 | |
| 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 | |
| 	__rt_spin_lock(lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_lock);
 | |
| 
 | |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC
 | |
| void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass)
 | |
| {
 | |
| 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
 | |
| 	__rt_spin_lock(lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_lock_nested);
 | |
| 
 | |
| void __sched rt_spin_lock_nest_lock(spinlock_t *lock,
 | |
| 				    struct lockdep_map *nest_lock)
 | |
| {
 | |
| 	spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
 | |
| 	__rt_spin_lock(lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_lock_nest_lock);
 | |
| #endif
 | |
| 
 | |
| void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU)
 | |
| {
 | |
| 	spin_release(&lock->dep_map, _RET_IP_);
 | |
| 	migrate_enable();
 | |
| 	rcu_read_unlock();
 | |
| 
 | |
| 	if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL)))
 | |
| 		rt_mutex_slowunlock(&lock->lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_unlock);
 | |
| 
 | |
| /*
 | |
|  * Wait for the lock to get unlocked: instead of polling for an unlock
 | |
|  * (like raw spinlocks do), lock and unlock, to force the kernel to
 | |
|  * schedule if there's contention:
 | |
|  */
 | |
| void __sched rt_spin_lock_unlock(spinlock_t *lock)
 | |
| {
 | |
| 	spin_lock(lock);
 | |
| 	spin_unlock(lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_lock_unlock);
 | |
| 
 | |
| static __always_inline int __rt_spin_trylock(spinlock_t *lock)
 | |
| {
 | |
| 	int ret = 1;
 | |
| 
 | |
| 	if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current)))
 | |
| 		ret = rt_mutex_slowtrylock(&lock->lock);
 | |
| 
 | |
| 	if (ret) {
 | |
| 		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 | |
| 		rcu_read_lock();
 | |
| 		migrate_disable();
 | |
| 	}
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| int __sched rt_spin_trylock(spinlock_t *lock)
 | |
| {
 | |
| 	return __rt_spin_trylock(lock);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_trylock);
 | |
| 
 | |
| int __sched rt_spin_trylock_bh(spinlock_t *lock)
 | |
| {
 | |
| 	int ret;
 | |
| 
 | |
| 	local_bh_disable();
 | |
| 	ret = __rt_spin_trylock(lock);
 | |
| 	if (!ret)
 | |
| 		local_bh_enable();
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL(rt_spin_trylock_bh);
 | |
| 
 | |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC
 | |
| void __rt_spin_lock_init(spinlock_t *lock, const char *name,
 | |
| 			 struct lock_class_key *key, bool percpu)
 | |
| {
 | |
| 	u8 type = percpu ? LD_LOCK_PERCPU : LD_LOCK_NORMAL;
 | |
| 
 | |
| 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 | |
| 	lockdep_init_map_type(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG,
 | |
| 			      LD_WAIT_INV, type);
 | |
| }
 | |
| EXPORT_SYMBOL(__rt_spin_lock_init);
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * RT-specific reader/writer locks
 | |
|  */
 | |
| #define rwbase_set_and_save_current_state(state)	\
 | |
| 	current_save_and_set_rtlock_wait_state()
 | |
| 
 | |
| #define rwbase_restore_current_state()			\
 | |
| 	current_restore_rtlock_saved_state()
 | |
| 
 | |
| static __always_inline int
 | |
| rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
 | |
| {
 | |
| 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
 | |
| 		rtlock_slowlock(rtm);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static __always_inline int
 | |
| rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
 | |
| 			       struct wake_q_head *wake_q)
 | |
| {
 | |
| 	rtlock_slowlock_locked(rtm, wake_q);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex_base *rtm)
 | |
| {
 | |
| 	if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL)))
 | |
| 		return;
 | |
| 
 | |
| 	rt_mutex_slowunlock(rtm);
 | |
| }
 | |
| 
 | |
| static __always_inline int  rwbase_rtmutex_trylock(struct rt_mutex_base *rtm)
 | |
| {
 | |
| 	if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
 | |
| 		return 1;
 | |
| 
 | |
| 	return rt_mutex_slowtrylock(rtm);
 | |
| }
 | |
| 
 | |
| #define rwbase_signal_pending_state(state, current)	(0)
 | |
| 
 | |
| #define rwbase_pre_schedule()
 | |
| 
 | |
| #define rwbase_schedule()				\
 | |
| 	schedule_rtlock()
 | |
| 
 | |
| #define rwbase_post_schedule()
 | |
| 
 | |
| #include "rwbase_rt.c"
 | |
| /*
 | |
|  * The common functions which get wrapped into the rwlock API.
 | |
|  */
 | |
| int __sched rt_read_trylock(rwlock_t *rwlock)
 | |
| {
 | |
| 	int ret;
 | |
| 
 | |
| 	ret = rwbase_read_trylock(&rwlock->rwbase);
 | |
| 	if (ret) {
 | |
| 		rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
 | |
| 		rcu_read_lock();
 | |
| 		migrate_disable();
 | |
| 	}
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL(rt_read_trylock);
 | |
| 
 | |
| int __sched rt_write_trylock(rwlock_t *rwlock)
 | |
| {
 | |
| 	int ret;
 | |
| 
 | |
| 	ret = rwbase_write_trylock(&rwlock->rwbase);
 | |
| 	if (ret) {
 | |
| 		rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
 | |
| 		rcu_read_lock();
 | |
| 		migrate_disable();
 | |
| 	}
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL(rt_write_trylock);
 | |
| 
 | |
| void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU)
 | |
| {
 | |
| 	rtlock_might_resched();
 | |
| 	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
 | |
| 	rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 | |
| 	rcu_read_lock();
 | |
| 	migrate_disable();
 | |
| }
 | |
| EXPORT_SYMBOL(rt_read_lock);
 | |
| 
 | |
| void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU)
 | |
| {
 | |
| 	rtlock_might_resched();
 | |
| 	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
 | |
| 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 | |
| 	rcu_read_lock();
 | |
| 	migrate_disable();
 | |
| }
 | |
| EXPORT_SYMBOL(rt_write_lock);
 | |
| 
 | |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC
 | |
| void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU)
 | |
| {
 | |
| 	rtlock_might_resched();
 | |
| 	rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
 | |
| 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 | |
| 	rcu_read_lock();
 | |
| 	migrate_disable();
 | |
| }
 | |
| EXPORT_SYMBOL(rt_write_lock_nested);
 | |
| #endif
 | |
| 
 | |
| void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU)
 | |
| {
 | |
| 	rwlock_release(&rwlock->dep_map, _RET_IP_);
 | |
| 	migrate_enable();
 | |
| 	rcu_read_unlock();
 | |
| 	rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_read_unlock);
 | |
| 
 | |
| void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU)
 | |
| {
 | |
| 	rwlock_release(&rwlock->dep_map, _RET_IP_);
 | |
| 	rcu_read_unlock();
 | |
| 	migrate_enable();
 | |
| 	rwbase_write_unlock(&rwlock->rwbase);
 | |
| }
 | |
| EXPORT_SYMBOL(rt_write_unlock);
 | |
| 
 | |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC
 | |
| void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
 | |
| 		      struct lock_class_key *key)
 | |
| {
 | |
| 	debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
 | |
| 	lockdep_init_map_wait(&rwlock->dep_map, name, key, 0, LD_WAIT_CONFIG);
 | |
| }
 | |
| EXPORT_SYMBOL(__rt_rwlock_init);
 | |
| #endif
 |