mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	sched/vtime: Bring up complete kcpustat accessor
Many callsites want to fetch the values of system, user, user_nice, guest or guest_nice kcpustat fields altogether or at least a pair of these. In that case calling kcpustat_field() for each requested field brings unecessary overhead when we could fetch all of them in a row. So provide kcpustat_cpu_fetch() that fetches the whole kcpustat array in a vtime safe way under the same RCU and seqcount block. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wanpeng Li <wanpengli@tencent.com> Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com> Link: https://lkml.kernel.org/r/20191121024430.19938-3-frederic@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									5a1c95580f
								
							
						
					
					
						commit
						74722bb223
					
				
					 2 changed files with 123 additions and 20 deletions
				
			
		| 
						 | 
				
			
			@ -81,12 +81,19 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 | 
			
		|||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 | 
			
		||||
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 | 
			
		||||
			  enum cpu_usage_stat usage, int cpu);
 | 
			
		||||
extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
 | 
			
		||||
#else
 | 
			
		||||
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 | 
			
		||||
				 enum cpu_usage_stat usage, int cpu)
 | 
			
		||||
{
 | 
			
		||||
	return kcpustat->cpustat[usage];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
 | 
			
		||||
{
 | 
			
		||||
	*dst = kcpustat_cpu(cpu);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern void account_user_time(struct task_struct *, u64);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -912,6 +912,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 | 
			
		|||
	} while (read_seqcount_retry(&vtime->seqcount, seq));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vtime_state_check(struct vtime *vtime, int cpu)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * We raced against a context switch, fetch the
 | 
			
		||||
	 * kcpustat task again.
 | 
			
		||||
	 */
 | 
			
		||||
	if (vtime->cpu != cpu && vtime->cpu != -1)
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Two possible things here:
 | 
			
		||||
	 * 1) We are seeing the scheduling out task (prev) or any past one.
 | 
			
		||||
	 * 2) We are seeing the scheduling in task (next) but it hasn't
 | 
			
		||||
	 *    passed though vtime_task_switch() yet so the pending
 | 
			
		||||
	 *    cputime of the prev task may not be flushed yet.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
 | 
			
		||||
	 */
 | 
			
		||||
	if (vtime->state == VTIME_INACTIVE)
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u64 kcpustat_user_vtime(struct vtime *vtime)
 | 
			
		||||
{
 | 
			
		||||
	if (vtime->state == VTIME_USER)
 | 
			
		||||
| 
						 | 
				
			
			@ -933,26 +957,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
 | 
			
		|||
	do {
 | 
			
		||||
		seq = read_seqcount_begin(&vtime->seqcount);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We raced against context switch, fetch the
 | 
			
		||||
		 * kcpustat task again.
 | 
			
		||||
		 */
 | 
			
		||||
		if (vtime->cpu != cpu && vtime->cpu != -1)
 | 
			
		||||
			return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Two possible things here:
 | 
			
		||||
		 * 1) We are seeing the scheduling out task (prev) or any past one.
 | 
			
		||||
		 * 2) We are seeing the scheduling in task (next) but it hasn't
 | 
			
		||||
		 *    passed though vtime_task_switch() yet so the pending
 | 
			
		||||
		 *    cputime of the prev task may not be flushed yet.
 | 
			
		||||
		 *
 | 
			
		||||
		 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
 | 
			
		||||
		 */
 | 
			
		||||
		if (vtime->state == VTIME_INACTIVE)
 | 
			
		||||
			return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
		err = 0;
 | 
			
		||||
		err = vtime_state_check(vtime, cpu);
 | 
			
		||||
		if (err < 0)
 | 
			
		||||
			return err;
 | 
			
		||||
 | 
			
		||||
		*val = cpustat[usage];
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1025,4 +1032,93 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(kcpustat_field);
 | 
			
		||||
 | 
			
		||||
static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
 | 
			
		||||
				    const struct kernel_cpustat *src,
 | 
			
		||||
				    struct task_struct *tsk, int cpu)
 | 
			
		||||
{
 | 
			
		||||
	struct vtime *vtime = &tsk->vtime;
 | 
			
		||||
	unsigned int seq;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		u64 *cpustat;
 | 
			
		||||
		u64 delta;
 | 
			
		||||
 | 
			
		||||
		seq = read_seqcount_begin(&vtime->seqcount);
 | 
			
		||||
 | 
			
		||||
		err = vtime_state_check(vtime, cpu);
 | 
			
		||||
		if (err < 0)
 | 
			
		||||
			return err;
 | 
			
		||||
 | 
			
		||||
		*dst = *src;
 | 
			
		||||
		cpustat = dst->cpustat;
 | 
			
		||||
 | 
			
		||||
		/* Task is sleeping, dead or idle, nothing to add */
 | 
			
		||||
		if (vtime->state < VTIME_SYS)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		delta = vtime_delta(vtime);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Task runs either in user (including guest) or kernel space,
 | 
			
		||||
		 * add pending nohz time to the right place.
 | 
			
		||||
		 */
 | 
			
		||||
		if (vtime->state == VTIME_SYS) {
 | 
			
		||||
			cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
 | 
			
		||||
		} else if (vtime->state == VTIME_USER) {
 | 
			
		||||
			if (task_nice(tsk) > 0)
 | 
			
		||||
				cpustat[CPUTIME_NICE] += vtime->utime + delta;
 | 
			
		||||
			else
 | 
			
		||||
				cpustat[CPUTIME_USER] += vtime->utime + delta;
 | 
			
		||||
		} else {
 | 
			
		||||
			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
 | 
			
		||||
			if (task_nice(tsk) > 0) {
 | 
			
		||||
				cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
 | 
			
		||||
				cpustat[CPUTIME_NICE] += vtime->gtime + delta;
 | 
			
		||||
			} else {
 | 
			
		||||
				cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
 | 
			
		||||
				cpustat[CPUTIME_USER] += vtime->gtime + delta;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	} while (read_seqcount_retry(&vtime->seqcount, seq));
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
 | 
			
		||||
{
 | 
			
		||||
	const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
 | 
			
		||||
	struct rq *rq;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	if (!vtime_accounting_enabled_cpu(cpu)) {
 | 
			
		||||
		*dst = *src;
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rq = cpu_rq(cpu);
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		struct task_struct *curr;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		curr = rcu_dereference(rq->curr);
 | 
			
		||||
		if (WARN_ON_ONCE(!curr)) {
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
			*dst = *src;
 | 
			
		||||
			return;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
		if (!err)
 | 
			
		||||
			return;
 | 
			
		||||
 | 
			
		||||
		cpu_relax();
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
 | 
			
		||||
 | 
			
		||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue