mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	psi: Reduce calls to sched_clock() in psi
We noticed that the cost of psi increases with the increase in the
levels of the cgroups. Particularly the cost of cpu_clock() sticks out
as the kernel calls it multiple times as it traverses up the cgroup
tree. This patch reduces the calls to cpu_clock().
Performed perf bench on Intel Broadwell with 3 levels of cgroup.
Before the patch:
$ perf bench sched all
 # Running sched/messaging benchmark...
 # 20 sender and receiver processes per group
 # 10 groups == 400 processes run
     Total time: 0.747 [sec]
 # Running sched/pipe benchmark...
 # Executed 1000000 pipe operations between two processes
     Total time: 3.516 [sec]
       3.516689 usecs/op
         284358 ops/sec
After the patch:
$ perf bench sched all
 # Running sched/messaging benchmark...
 # 20 sender and receiver processes per group
 # 10 groups == 400 processes run
     Total time: 0.640 [sec]
 # Running sched/pipe benchmark...
 # Executed 1000000 pipe operations between two processes
     Total time: 3.329 [sec]
       3.329820 usecs/op
         300316 ops/sec
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210321205156.4186483-1-shakeelb@google.com
			
			
This commit is contained in:
		
							parent
							
								
									2a2f80ff63
								
							
						
					
					
						commit
						df77430639
					
				
					 1 changed files with 10 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -644,12 +644,10 @@ static void poll_timer_fn(struct timer_list *t)
 | 
			
		|||
	wake_up_interruptible(&group->poll_wait);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void record_times(struct psi_group_cpu *groupc, int cpu)
 | 
			
		||||
static void record_times(struct psi_group_cpu *groupc, u64 now)
 | 
			
		||||
{
 | 
			
		||||
	u32 delta;
 | 
			
		||||
	u64 now;
 | 
			
		||||
 | 
			
		||||
	now = cpu_clock(cpu);
 | 
			
		||||
	delta = now - groupc->state_start;
 | 
			
		||||
	groupc->state_start = now;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -676,7 +674,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
static void psi_group_change(struct psi_group *group, int cpu,
 | 
			
		||||
			     unsigned int clear, unsigned int set,
 | 
			
		||||
			     unsigned int clear, unsigned int set, u64 now,
 | 
			
		||||
			     bool wake_clock)
 | 
			
		||||
{
 | 
			
		||||
	struct psi_group_cpu *groupc;
 | 
			
		||||
| 
						 | 
				
			
			@ -696,7 +694,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
 | 
			
		|||
	 */
 | 
			
		||||
	write_seqcount_begin(&groupc->seq);
 | 
			
		||||
 | 
			
		||||
	record_times(groupc, cpu);
 | 
			
		||||
	record_times(groupc, now);
 | 
			
		||||
 | 
			
		||||
	for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
 | 
			
		||||
		if (!(m & (1 << t)))
 | 
			
		||||
| 
						 | 
				
			
			@ -788,12 +786,14 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 | 
			
		|||
	struct psi_group *group;
 | 
			
		||||
	bool wake_clock = true;
 | 
			
		||||
	void *iter = NULL;
 | 
			
		||||
	u64 now;
 | 
			
		||||
 | 
			
		||||
	if (!task->pid)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	psi_flags_change(task, clear, set);
 | 
			
		||||
 | 
			
		||||
	now = cpu_clock(cpu);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Periodic aggregation shuts off if there is a period of no
 | 
			
		||||
	 * task changes, so we wake it back up if necessary. However,
 | 
			
		||||
| 
						 | 
				
			
			@ -806,7 +806,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 | 
			
		|||
		wake_clock = false;
 | 
			
		||||
 | 
			
		||||
	while ((group = iterate_groups(task, &iter)))
 | 
			
		||||
		psi_group_change(group, cpu, clear, set, wake_clock);
 | 
			
		||||
		psi_group_change(group, cpu, clear, set, now, wake_clock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 | 
			
		||||
| 
						 | 
				
			
			@ -815,6 +815,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 | 
			
		|||
	struct psi_group *group, *common = NULL;
 | 
			
		||||
	int cpu = task_cpu(prev);
 | 
			
		||||
	void *iter;
 | 
			
		||||
	u64 now = cpu_clock(cpu);
 | 
			
		||||
 | 
			
		||||
	if (next->pid) {
 | 
			
		||||
		bool identical_state;
 | 
			
		||||
| 
						 | 
				
			
			@ -836,7 +837,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 | 
			
		|||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			psi_group_change(group, cpu, 0, TSK_ONCPU, true);
 | 
			
		||||
			psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -858,7 +859,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 | 
			
		|||
 | 
			
		||||
		iter = NULL;
 | 
			
		||||
		while ((group = iterate_groups(prev, &iter)) && group != common)
 | 
			
		||||
			psi_group_change(group, cpu, clear, set, true);
 | 
			
		||||
			psi_group_change(group, cpu, clear, set, now, true);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * TSK_ONCPU is handled up to the common ancestor. If we're tasked
 | 
			
		||||
| 
						 | 
				
			
			@ -867,7 +868,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
 | 
			
		|||
		if (sleep) {
 | 
			
		||||
			clear &= ~TSK_ONCPU;
 | 
			
		||||
			for (; group; group = iterate_groups(prev, &iter))
 | 
			
		||||
				psi_group_change(group, cpu, clear, set, true);
 | 
			
		||||
				psi_group_change(group, cpu, clear, set, now, true);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue