mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	psi: eliminate kthread_worker from psi trigger scheduling mechanism
Each psi group requires a dedicated kthread_delayed_work and kthread_worker. Since no other work can be performed using psi_group's kthread_worker, the same result can be obtained using a task_struct and a timer directly. This makes psi triggering simpler by removing lists and locks involved with kthread_worker usage and eliminates the need for poll_scheduled atomic use in the hot path. Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20200528195442.190116-1-surenb@google.com
This commit is contained in:
		
							parent
							
								
									f4291df103
								
							
						
					
					
						commit
						461daba06b
					
				
					 2 changed files with 68 additions and 52 deletions
				
			
		| 
						 | 
				
			
			@ -153,9 +153,10 @@ struct psi_group {
 | 
			
		|||
	unsigned long avg[NR_PSI_STATES - 1][3];
 | 
			
		||||
 | 
			
		||||
	/* Monitor work control */
 | 
			
		||||
	atomic_t poll_scheduled;
 | 
			
		||||
	struct kthread_worker __rcu *poll_kworker;
 | 
			
		||||
	struct kthread_delayed_work poll_work;
 | 
			
		||||
	struct task_struct __rcu *poll_task;
 | 
			
		||||
	struct timer_list poll_timer;
 | 
			
		||||
	wait_queue_head_t poll_wait;
 | 
			
		||||
	atomic_t poll_wakeup;
 | 
			
		||||
 | 
			
		||||
	/* Protects data used by the monitor */
 | 
			
		||||
	struct mutex trigger_lock;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -190,7 +190,6 @@ static void group_init(struct psi_group *group)
 | 
			
		|||
	INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
 | 
			
		||||
	mutex_init(&group->avgs_lock);
 | 
			
		||||
	/* Init trigger-related members */
 | 
			
		||||
	atomic_set(&group->poll_scheduled, 0);
 | 
			
		||||
	mutex_init(&group->trigger_lock);
 | 
			
		||||
	INIT_LIST_HEAD(&group->triggers);
 | 
			
		||||
	memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
 | 
			
		||||
| 
						 | 
				
			
			@ -199,7 +198,7 @@ static void group_init(struct psi_group *group)
 | 
			
		|||
	memset(group->polling_total, 0, sizeof(group->polling_total));
 | 
			
		||||
	group->polling_next_update = ULLONG_MAX;
 | 
			
		||||
	group->polling_until = 0;
 | 
			
		||||
	rcu_assign_pointer(group->poll_kworker, NULL);
 | 
			
		||||
	rcu_assign_pointer(group->poll_task, NULL);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __init psi_init(void)
 | 
			
		||||
| 
						 | 
				
			
			@ -547,47 +546,38 @@ static u64 update_triggers(struct psi_group *group, u64 now)
 | 
			
		|||
	return now + group->poll_min_period;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Schedule polling if it's not already scheduled. It's safe to call even from
 | 
			
		||||
 * hotpath because even though kthread_queue_delayed_work takes worker->lock
 | 
			
		||||
 * spinlock that spinlock is never contended due to poll_scheduled atomic
 | 
			
		||||
 * preventing such competition.
 | 
			
		||||
 */
 | 
			
		||||
/* Schedule polling if it's not already scheduled. */
 | 
			
		||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
 | 
			
		||||
{
 | 
			
		||||
	struct kthread_worker *kworker;
 | 
			
		||||
	struct task_struct *task;
 | 
			
		||||
 | 
			
		||||
	/* Do not reschedule if already scheduled */
 | 
			
		||||
	if (atomic_cmpxchg(&group->poll_scheduled, 0, 1) != 0)
 | 
			
		||||
	/*
 | 
			
		||||
	 * Do not reschedule if already scheduled.
 | 
			
		||||
	 * Possible race with a timer scheduled after this check but before
 | 
			
		||||
	 * mod_timer below can be tolerated because group->polling_next_update
 | 
			
		||||
	 * will keep updates on schedule.
 | 
			
		||||
	 */
 | 
			
		||||
	if (timer_pending(&group->poll_timer))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	kworker = rcu_dereference(group->poll_kworker);
 | 
			
		||||
	task = rcu_dereference(group->poll_task);
 | 
			
		||||
	/*
 | 
			
		||||
	 * kworker might be NULL in case psi_trigger_destroy races with
 | 
			
		||||
	 * psi_task_change (hotpath) which can't use locks
 | 
			
		||||
	 */
 | 
			
		||||
	if (likely(kworker))
 | 
			
		||||
		kthread_queue_delayed_work(kworker, &group->poll_work, delay);
 | 
			
		||||
	else
 | 
			
		||||
		atomic_set(&group->poll_scheduled, 0);
 | 
			
		||||
	if (likely(task))
 | 
			
		||||
		mod_timer(&group->poll_timer, jiffies + delay);
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void psi_poll_work(struct kthread_work *work)
 | 
			
		||||
static void psi_poll_work(struct psi_group *group)
 | 
			
		||||
{
 | 
			
		||||
	struct kthread_delayed_work *dwork;
 | 
			
		||||
	struct psi_group *group;
 | 
			
		||||
	u32 changed_states;
 | 
			
		||||
	u64 now;
 | 
			
		||||
 | 
			
		||||
	dwork = container_of(work, struct kthread_delayed_work, work);
 | 
			
		||||
	group = container_of(dwork, struct psi_group, poll_work);
 | 
			
		||||
 | 
			
		||||
	atomic_set(&group->poll_scheduled, 0);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&group->trigger_lock);
 | 
			
		||||
 | 
			
		||||
	now = sched_clock();
 | 
			
		||||
| 
						 | 
				
			
			@ -623,6 +613,35 @@ static void psi_poll_work(struct kthread_work *work)
 | 
			
		|||
	mutex_unlock(&group->trigger_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int psi_poll_worker(void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct psi_group *group = (struct psi_group *)data;
 | 
			
		||||
	struct sched_param param = {
 | 
			
		||||
		.sched_priority = 1,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
 | 
			
		||||
 | 
			
		||||
	while (true) {
 | 
			
		||||
		wait_event_interruptible(group->poll_wait,
 | 
			
		||||
				atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
 | 
			
		||||
				kthread_should_stop());
 | 
			
		||||
		if (kthread_should_stop())
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		psi_poll_work(group);
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void poll_timer_fn(struct timer_list *t)
 | 
			
		||||
{
 | 
			
		||||
	struct psi_group *group = from_timer(group, t, poll_timer);
 | 
			
		||||
 | 
			
		||||
	atomic_set(&group->poll_wakeup, 1);
 | 
			
		||||
	wake_up_interruptible(&group->poll_wait);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void record_times(struct psi_group_cpu *groupc, int cpu,
 | 
			
		||||
			 bool memstall_tick)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1099,22 +1118,20 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 | 
			
		|||
 | 
			
		||||
	mutex_lock(&group->trigger_lock);
 | 
			
		||||
 | 
			
		||||
	if (!rcu_access_pointer(group->poll_kworker)) {
 | 
			
		||||
		struct sched_param param = {
 | 
			
		||||
			.sched_priority = 1,
 | 
			
		||||
		};
 | 
			
		||||
		struct kthread_worker *kworker;
 | 
			
		||||
	if (!rcu_access_pointer(group->poll_task)) {
 | 
			
		||||
		struct task_struct *task;
 | 
			
		||||
 | 
			
		||||
		kworker = kthread_create_worker(0, "psimon");
 | 
			
		||||
		if (IS_ERR(kworker)) {
 | 
			
		||||
		task = kthread_create(psi_poll_worker, group, "psimon");
 | 
			
		||||
		if (IS_ERR(task)) {
 | 
			
		||||
			kfree(t);
 | 
			
		||||
			mutex_unlock(&group->trigger_lock);
 | 
			
		||||
			return ERR_CAST(kworker);
 | 
			
		||||
			return ERR_CAST(task);
 | 
			
		||||
		}
 | 
			
		||||
		sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m);
 | 
			
		||||
		kthread_init_delayed_work(&group->poll_work,
 | 
			
		||||
				psi_poll_work);
 | 
			
		||||
		rcu_assign_pointer(group->poll_kworker, kworker);
 | 
			
		||||
		atomic_set(&group->poll_wakeup, 0);
 | 
			
		||||
		init_waitqueue_head(&group->poll_wait);
 | 
			
		||||
		wake_up_process(task);
 | 
			
		||||
		timer_setup(&group->poll_timer, poll_timer_fn, 0);
 | 
			
		||||
		rcu_assign_pointer(group->poll_task, task);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	list_add(&t->node, &group->triggers);
 | 
			
		||||
| 
						 | 
				
			
			@ -1132,7 +1149,7 @@ static void psi_trigger_destroy(struct kref *ref)
 | 
			
		|||
{
 | 
			
		||||
	struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
 | 
			
		||||
	struct psi_group *group = t->group;
 | 
			
		||||
	struct kthread_worker *kworker_to_destroy = NULL;
 | 
			
		||||
	struct task_struct *task_to_destroy = NULL;
 | 
			
		||||
 | 
			
		||||
	if (static_branch_likely(&psi_disabled))
 | 
			
		||||
		return;
 | 
			
		||||
| 
						 | 
				
			
			@ -1158,13 +1175,13 @@ static void psi_trigger_destroy(struct kref *ref)
 | 
			
		|||
			period = min(period, div_u64(tmp->win.size,
 | 
			
		||||
					UPDATES_PER_WINDOW));
 | 
			
		||||
		group->poll_min_period = period;
 | 
			
		||||
		/* Destroy poll_kworker when the last trigger is destroyed */
 | 
			
		||||
		/* Destroy poll_task when the last trigger is destroyed */
 | 
			
		||||
		if (group->poll_states == 0) {
 | 
			
		||||
			group->polling_until = 0;
 | 
			
		||||
			kworker_to_destroy = rcu_dereference_protected(
 | 
			
		||||
					group->poll_kworker,
 | 
			
		||||
			task_to_destroy = rcu_dereference_protected(
 | 
			
		||||
					group->poll_task,
 | 
			
		||||
					lockdep_is_held(&group->trigger_lock));
 | 
			
		||||
			rcu_assign_pointer(group->poll_kworker, NULL);
 | 
			
		||||
			rcu_assign_pointer(group->poll_task, NULL);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1172,25 +1189,23 @@ static void psi_trigger_destroy(struct kref *ref)
 | 
			
		|||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Wait for both *trigger_ptr from psi_trigger_replace and
 | 
			
		||||
	 * poll_kworker RCUs to complete their read-side critical sections
 | 
			
		||||
	 * before destroying the trigger and optionally the poll_kworker
 | 
			
		||||
	 * poll_task RCUs to complete their read-side critical sections
 | 
			
		||||
	 * before destroying the trigger and optionally the poll_task
 | 
			
		||||
	 */
 | 
			
		||||
	synchronize_rcu();
 | 
			
		||||
	/*
 | 
			
		||||
	 * Destroy the kworker after releasing trigger_lock to prevent a
 | 
			
		||||
	 * deadlock while waiting for psi_poll_work to acquire trigger_lock
 | 
			
		||||
	 */
 | 
			
		||||
	if (kworker_to_destroy) {
 | 
			
		||||
	if (task_to_destroy) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * After the RCU grace period has expired, the worker
 | 
			
		||||
		 * can no longer be found through group->poll_kworker.
 | 
			
		||||
		 * can no longer be found through group->poll_task.
 | 
			
		||||
		 * But it might have been already scheduled before
 | 
			
		||||
		 * that - deschedule it cleanly before destroying it.
 | 
			
		||||
		 */
 | 
			
		||||
		kthread_cancel_delayed_work_sync(&group->poll_work);
 | 
			
		||||
		atomic_set(&group->poll_scheduled, 0);
 | 
			
		||||
 | 
			
		||||
		kthread_destroy_worker(kworker_to_destroy);
 | 
			
		||||
		del_timer_sync(&group->poll_timer);
 | 
			
		||||
		kthread_stop(task_to_destroy);
 | 
			
		||||
	}
 | 
			
		||||
	kfree(t);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue