mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
this replaces the rq->clock stuff (and possibly cpu_clock()).
 - architectures that have an 'imperfect' hardware clock can set
   CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 - the 'jiffie' window might be superfulous when we update tick_gtod
   before the __update_sched_clock() call in sched_clock_tick()
 - cpu_clock() might be implemented as:
     sched_clock_cpu(smp_processor_id())
   if the accuracy proves good enough - how far can TSC drift in a
   single jiffie when considering the filtering and idle hooks?
[ mingo@elte.hu: various fixes and cleanups ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
			
			
This commit is contained in:
		
							parent
							
								
									a5574cf65b
								
							
						
					
					
						commit
						3e51f33fcc
					
				
					 7 changed files with 281 additions and 161 deletions
				
			
		| 
						 | 
					@ -1553,6 +1553,35 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern unsigned long long sched_clock(void);
 | 
					extern unsigned long long sched_clock(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 | 
				
			||||||
 | 
					static inline void sched_clock_init(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline u64 sched_clock_cpu(int cpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return sched_clock();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void sched_clock_tick(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void sched_clock_idle_sleep_event(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					extern void sched_clock_init(void);
 | 
				
			||||||
 | 
					extern u64 sched_clock_cpu(int cpu);
 | 
				
			||||||
 | 
					extern void sched_clock_tick(void);
 | 
				
			||||||
 | 
					extern void sched_clock_idle_sleep_event(void);
 | 
				
			||||||
 | 
					extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
 | 
					 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
 | 
				
			||||||
 * clock constructed from sched_clock():
 | 
					 * clock constructed from sched_clock():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void)
 | 
				
			||||||
	softirq_init();
 | 
						softirq_init();
 | 
				
			||||||
	timekeeping_init();
 | 
						timekeeping_init();
 | 
				
			||||||
	time_init();
 | 
						time_init();
 | 
				
			||||||
 | 
						sched_clock_init();
 | 
				
			||||||
	profile_init();
 | 
						profile_init();
 | 
				
			||||||
	if (!irqs_disabled())
 | 
						if (!irqs_disabled())
 | 
				
			||||||
		printk("start_kernel(): bug: interrupts were enabled early\n");
 | 
							printk("start_kernel(): bug: interrupts were enabled early\n");
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 | 
				
			||||||
	    rcupdate.o extable.o params.o posix-timers.o \
 | 
						    rcupdate.o extable.o params.o posix-timers.o \
 | 
				
			||||||
	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 | 
						    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 | 
				
			||||||
	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 | 
						    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 | 
				
			||||||
	    notifier.o ksysfs.o pm_qos_params.o
 | 
						    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 | 
					obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 | 
				
			||||||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
 | 
					obj-$(CONFIG_STACKTRACE) += stacktrace.o
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										165
									
								
								kernel/sched.c
									
									
									
									
									
								
							
							
						
						
									
										165
									
								
								kernel/sched.c
									
									
									
									
									
								
							| 
						 | 
					@ -74,16 +74,6 @@
 | 
				
			||||||
#include <asm/tlb.h>
 | 
					#include <asm/tlb.h>
 | 
				
			||||||
#include <asm/irq_regs.h>
 | 
					#include <asm/irq_regs.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Scheduler clock - returns current time in nanosec units.
 | 
					 | 
				
			||||||
 * This is default implementation.
 | 
					 | 
				
			||||||
 * Architectures and sub-architectures can override this.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
unsigned long long __attribute__((weak)) sched_clock(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Convert user-nice values [ -20 ... 0 ... 19 ]
 | 
					 * Convert user-nice values [ -20 ... 0 ... 19 ]
 | 
				
			||||||
 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
 | 
					 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
 | 
				
			||||||
| 
						 | 
					@ -557,13 +547,7 @@ struct rq {
 | 
				
			||||||
	unsigned long next_balance;
 | 
						unsigned long next_balance;
 | 
				
			||||||
	struct mm_struct *prev_mm;
 | 
						struct mm_struct *prev_mm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	u64 clock, prev_clock_raw;
 | 
						u64 clock;
 | 
				
			||||||
	s64 clock_max_delta;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	unsigned int clock_warps, clock_overflows, clock_underflows;
 | 
					 | 
				
			||||||
	u64 idle_clock;
 | 
					 | 
				
			||||||
	unsigned int clock_deep_idle_events;
 | 
					 | 
				
			||||||
	u64 tick_timestamp;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_t nr_iowait;
 | 
						atomic_t nr_iowait;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -628,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_NO_HZ
 | 
					 | 
				
			||||||
static inline bool nohz_on(int cpu)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline u64 max_skipped_ticks(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void update_last_tick_seen(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	rq->last_tick_seen = jiffies;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
static inline u64 max_skipped_ticks(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return 1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void update_last_tick_seen(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Update the per-runqueue clock, as finegrained as the platform can give
 | 
					 | 
				
			||||||
 * us, but without assuming monotonicity, etc.:
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void __update_rq_clock(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	u64 prev_raw = rq->prev_clock_raw;
 | 
					 | 
				
			||||||
	u64 now = sched_clock();
 | 
					 | 
				
			||||||
	s64 delta = now - prev_raw;
 | 
					 | 
				
			||||||
	u64 clock = rq->clock;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_SCHED_DEBUG
 | 
					 | 
				
			||||||
	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Protect against sched_clock() occasionally going backwards:
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (unlikely(delta < 0)) {
 | 
					 | 
				
			||||||
		clock++;
 | 
					 | 
				
			||||||
		rq->clock_warps++;
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Catch too large forward jumps too:
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
 | 
					 | 
				
			||||||
		u64 max_time = rq->tick_timestamp + max_jump;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (unlikely(clock + delta > max_time)) {
 | 
					 | 
				
			||||||
			if (clock < max_time)
 | 
					 | 
				
			||||||
				clock = max_time;
 | 
					 | 
				
			||||||
			else
 | 
					 | 
				
			||||||
				clock++;
 | 
					 | 
				
			||||||
			rq->clock_overflows++;
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			if (unlikely(delta > rq->clock_max_delta))
 | 
					 | 
				
			||||||
				rq->clock_max_delta = delta;
 | 
					 | 
				
			||||||
			clock += delta;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	rq->prev_clock_raw = now;
 | 
					 | 
				
			||||||
	rq->clock = clock;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void update_rq_clock(struct rq *rq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (likely(smp_processor_id() == cpu_of(rq)))
 | 
					 | 
				
			||||||
		__update_rq_clock(rq);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
 | 
					 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
 | 
				
			||||||
 * See detach_destroy_domains: synchronize_sched for details.
 | 
					 * See detach_destroy_domains: synchronize_sched for details.
 | 
				
			||||||
| 
						 | 
					@ -719,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
 | 
				
			||||||
#define task_rq(p)		cpu_rq(task_cpu(p))
 | 
					#define task_rq(p)		cpu_rq(task_cpu(p))
 | 
				
			||||||
#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 | 
					#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void update_rq_clock(struct rq *rq)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						rq->clock = sched_clock_cpu(cpu_of(rq));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
 | 
					 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -935,7 +848,6 @@ static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
 | 
				
			||||||
static unsigned long long __cpu_clock(int cpu)
 | 
					static unsigned long long __cpu_clock(int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long long now;
 | 
						unsigned long long now;
 | 
				
			||||||
	struct rq *rq;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Only call sched_clock() if the scheduler has already been
 | 
						 * Only call sched_clock() if the scheduler has already been
 | 
				
			||||||
| 
						 | 
					@ -944,9 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
 | 
				
			||||||
	if (unlikely(!scheduler_running))
 | 
						if (unlikely(!scheduler_running))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rq = cpu_rq(cpu);
 | 
						now = sched_clock_cpu(cpu);
 | 
				
			||||||
	update_rq_clock(rq);
 | 
					 | 
				
			||||||
	now = rq->clock;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return now;
 | 
						return now;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1120,45 +1030,6 @@ static struct rq *this_rq_lock(void)
 | 
				
			||||||
	return rq;
 | 
						return rq;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * We are going deep-idle (irqs are disabled):
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void sched_clock_idle_sleep_event(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct rq *rq = cpu_rq(smp_processor_id());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WARN_ON(!irqs_disabled());
 | 
					 | 
				
			||||||
	spin_lock(&rq->lock);
 | 
					 | 
				
			||||||
	__update_rq_clock(rq);
 | 
					 | 
				
			||||||
	spin_unlock(&rq->lock);
 | 
					 | 
				
			||||||
	rq->clock_deep_idle_events++;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * We just idled delta nanoseconds (called with irqs disabled):
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void sched_clock_idle_wakeup_event(u64 delta_ns)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct rq *rq = cpu_rq(smp_processor_id());
 | 
					 | 
				
			||||||
	u64 now = sched_clock();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WARN_ON(!irqs_disabled());
 | 
					 | 
				
			||||||
	rq->idle_clock += delta_ns;
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Override the previous timestamp and ignore all
 | 
					 | 
				
			||||||
	 * sched_clock() deltas that occured while we idled,
 | 
					 | 
				
			||||||
	 * and use the PM-provided delta_ns to advance the
 | 
					 | 
				
			||||||
	 * rq clock:
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	spin_lock(&rq->lock);
 | 
					 | 
				
			||||||
	rq->prev_clock_raw = now;
 | 
					 | 
				
			||||||
	rq->clock += delta_ns;
 | 
					 | 
				
			||||||
	spin_unlock(&rq->lock);
 | 
					 | 
				
			||||||
	touch_softlockup_watchdog();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void __resched_task(struct task_struct *p, int tif_bit);
 | 
					static void __resched_task(struct task_struct *p, int tif_bit);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void resched_task(struct task_struct *p)
 | 
					static inline void resched_task(struct task_struct *p)
 | 
				
			||||||
| 
						 | 
					@ -1283,7 +1154,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 | 
				
			||||||
	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 | 
						WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&rq->lock);
 | 
						spin_lock(&rq->lock);
 | 
				
			||||||
	__update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
 | 
						rq->curr->sched_class->task_tick(rq, rq->curr, 1);
 | 
				
			||||||
	spin_unlock(&rq->lock);
 | 
						spin_unlock(&rq->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4476,19 +4347,11 @@ void scheduler_tick(void)
 | 
				
			||||||
	int cpu = smp_processor_id();
 | 
						int cpu = smp_processor_id();
 | 
				
			||||||
	struct rq *rq = cpu_rq(cpu);
 | 
						struct rq *rq = cpu_rq(cpu);
 | 
				
			||||||
	struct task_struct *curr = rq->curr;
 | 
						struct task_struct *curr = rq->curr;
 | 
				
			||||||
	u64 next_tick = rq->tick_timestamp + TICK_NSEC;
 | 
					
 | 
				
			||||||
 | 
						sched_clock_tick();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&rq->lock);
 | 
						spin_lock(&rq->lock);
 | 
				
			||||||
	__update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Let rq->clock advance by at least TICK_NSEC:
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (unlikely(rq->clock < next_tick)) {
 | 
					 | 
				
			||||||
		rq->clock = next_tick;
 | 
					 | 
				
			||||||
		rq->clock_underflows++;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	rq->tick_timestamp = rq->clock;
 | 
					 | 
				
			||||||
	update_last_tick_seen(rq);
 | 
					 | 
				
			||||||
	update_cpu_load(rq);
 | 
						update_cpu_load(rq);
 | 
				
			||||||
	curr->sched_class->task_tick(rq, curr, 0);
 | 
						curr->sched_class->task_tick(rq, curr, 0);
 | 
				
			||||||
	spin_unlock(&rq->lock);
 | 
						spin_unlock(&rq->lock);
 | 
				
			||||||
| 
						 | 
					@ -4642,7 +4505,7 @@ asmlinkage void __sched schedule(void)
 | 
				
			||||||
	 * Do the rq-clock update outside the rq lock:
 | 
						 * Do the rq-clock update outside the rq lock:
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	local_irq_disable();
 | 
						local_irq_disable();
 | 
				
			||||||
	__update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
	spin_lock(&rq->lock);
 | 
						spin_lock(&rq->lock);
 | 
				
			||||||
	clear_tsk_need_resched(prev);
 | 
						clear_tsk_need_resched(prev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8226,8 +8089,6 @@ void __init sched_init(void)
 | 
				
			||||||
		spin_lock_init(&rq->lock);
 | 
							spin_lock_init(&rq->lock);
 | 
				
			||||||
		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
 | 
							lockdep_set_class(&rq->lock, &rq->rq_lock_key);
 | 
				
			||||||
		rq->nr_running = 0;
 | 
							rq->nr_running = 0;
 | 
				
			||||||
		rq->clock = 1;
 | 
					 | 
				
			||||||
		update_last_tick_seen(rq);
 | 
					 | 
				
			||||||
		init_cfs_rq(&rq->cfs, rq);
 | 
							init_cfs_rq(&rq->cfs, rq);
 | 
				
			||||||
		init_rt_rq(&rq->rt, rq);
 | 
							init_rt_rq(&rq->rt, rq);
 | 
				
			||||||
#ifdef CONFIG_FAIR_GROUP_SCHED
 | 
					#ifdef CONFIG_FAIR_GROUP_SCHED
 | 
				
			||||||
| 
						 | 
					@ -8371,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep);
 | 
				
			||||||
static void normalize_task(struct rq *rq, struct task_struct *p)
 | 
					static void normalize_task(struct rq *rq, struct task_struct *p)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int on_rq;
 | 
						int on_rq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	update_rq_clock(rq);
 | 
						update_rq_clock(rq);
 | 
				
			||||||
	on_rq = p->se.on_rq;
 | 
						on_rq = p->se.on_rq;
 | 
				
			||||||
	if (on_rq)
 | 
						if (on_rq)
 | 
				
			||||||
| 
						 | 
					@ -8402,7 +8264,6 @@ void normalize_rt_tasks(void)
 | 
				
			||||||
		p->se.sleep_start		= 0;
 | 
							p->se.sleep_start		= 0;
 | 
				
			||||||
		p->se.block_start		= 0;
 | 
							p->se.block_start		= 0;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		task_rq(p)->clock		= 0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!rt_task(p)) {
 | 
							if (!rt_task(p)) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										236
									
								
								kernel/sched_clock.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								kernel/sched_clock.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,236 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * sched_clock for unstable cpu clocks
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Based on code by:
 | 
				
			||||||
 | 
					 *   Ingo Molnar <mingo@redhat.com>
 | 
				
			||||||
 | 
					 *   Guillaume Chazarain <guichaz@gmail.com>
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Create a semi stable clock from a mixture of other events, including:
 | 
				
			||||||
 | 
					 *  - gtod
 | 
				
			||||||
 | 
					 *  - jiffies
 | 
				
			||||||
 | 
					 *  - sched_clock()
 | 
				
			||||||
 | 
					 *  - explicit idle events
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
 | 
				
			||||||
 | 
					 * making it monotonic and keeping it within an expected window.  This window
 | 
				
			||||||
 | 
					 * is set up using jiffies.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
 | 
				
			||||||
 | 
					 * that is otherwise invisible (TSC gets stopped).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
 | 
				
			||||||
 | 
					 * consistent between cpus (never more than 1 jiffies difference).
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <linux/sched.h>
 | 
				
			||||||
 | 
					#include <linux/percpu.h>
 | 
				
			||||||
 | 
					#include <linux/spinlock.h>
 | 
				
			||||||
 | 
					#include <linux/ktime.h>
 | 
				
			||||||
 | 
					#include <linux/module.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct sched_clock_data {
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Raw spinlock - this is a special case: this might be called
 | 
				
			||||||
 | 
						 * from within instrumentation code so we dont want to do any
 | 
				
			||||||
 | 
						 * instrumentation ourselves.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						raw_spinlock_t		lock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						unsigned long		prev_jiffies;
 | 
				
			||||||
 | 
						u64			prev_raw;
 | 
				
			||||||
 | 
						u64			tick_raw;
 | 
				
			||||||
 | 
						u64			tick_gtod;
 | 
				
			||||||
 | 
						u64			clock;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline struct sched_clock_data *this_scd(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return &__get_cpu_var(sched_clock_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline struct sched_clock_data *cpu_sdc(int cpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return &per_cpu(sched_clock_data, cpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sched_clock_init(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						u64 ktime_now = ktime_to_ns(ktime_get());
 | 
				
			||||||
 | 
						u64 now = 0;
 | 
				
			||||||
 | 
						int cpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_possible_cpu(cpu) {
 | 
				
			||||||
 | 
							struct sched_clock_data *scd = cpu_sdc(cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 | 
				
			||||||
 | 
							scd->prev_jiffies = jiffies;
 | 
				
			||||||
 | 
							scd->prev_raw = now;
 | 
				
			||||||
 | 
							scd->tick_raw = now;
 | 
				
			||||||
 | 
							scd->tick_gtod = ktime_now;
 | 
				
			||||||
 | 
							scd->clock = ktime_now;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * update the percpu scd from the raw @now value
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  - filter out backward motion
 | 
				
			||||||
 | 
					 *  - use jiffies to generate a min,max window to clip the raw values
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long now_jiffies = jiffies;
 | 
				
			||||||
 | 
						long delta_jiffies = now_jiffies - scd->prev_jiffies;
 | 
				
			||||||
 | 
						u64 clock = scd->clock;
 | 
				
			||||||
 | 
						u64 min_clock, max_clock;
 | 
				
			||||||
 | 
						s64 delta = now - scd->prev_raw;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						WARN_ON_ONCE(!irqs_disabled());
 | 
				
			||||||
 | 
						min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(delta < 0)) {
 | 
				
			||||||
 | 
							clock++;
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						max_clock = min_clock + TICK_NSEC;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(clock + delta > max_clock)) {
 | 
				
			||||||
 | 
							if (clock < max_clock)
 | 
				
			||||||
 | 
								clock = max_clock;
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								clock++;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							clock += delta;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 out:
 | 
				
			||||||
 | 
						if (unlikely(clock < min_clock))
 | 
				
			||||||
 | 
							clock = min_clock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						scd->prev_raw = now;
 | 
				
			||||||
 | 
						scd->prev_jiffies = now_jiffies;
 | 
				
			||||||
 | 
						scd->clock = clock;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void lock_double_clock(struct sched_clock_data *data1,
 | 
				
			||||||
 | 
									struct sched_clock_data *data2)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (data1 < data2) {
 | 
				
			||||||
 | 
							__raw_spin_lock(&data1->lock);
 | 
				
			||||||
 | 
							__raw_spin_lock(&data2->lock);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							__raw_spin_lock(&data2->lock);
 | 
				
			||||||
 | 
							__raw_spin_lock(&data1->lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					u64 sched_clock_cpu(int cpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sched_clock_data *scd = cpu_sdc(cpu);
 | 
				
			||||||
 | 
						u64 now, clock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						WARN_ON_ONCE(!irqs_disabled());
 | 
				
			||||||
 | 
						now = sched_clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cpu != raw_smp_processor_id()) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * in order to update a remote cpu's clock based on our
 | 
				
			||||||
 | 
							 * unstable raw time rebase it against:
 | 
				
			||||||
 | 
							 *   tick_raw		(offset between raw counters)
 | 
				
			||||||
 | 
							 *   tick_gotd          (tick offset between cpus)
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							struct sched_clock_data *my_scd = this_scd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							lock_double_clock(scd, my_scd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							now -= my_scd->tick_raw;
 | 
				
			||||||
 | 
							now += scd->tick_raw;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							now -= my_scd->tick_gtod;
 | 
				
			||||||
 | 
							now += scd->tick_gtod;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							__raw_spin_unlock(&my_scd->lock);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							__raw_spin_lock(&scd->lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__update_sched_clock(scd, now);
 | 
				
			||||||
 | 
						clock = scd->clock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__raw_spin_unlock(&scd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return clock;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void sched_clock_tick(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sched_clock_data *scd = this_scd();
 | 
				
			||||||
 | 
						u64 now, now_gtod;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						WARN_ON_ONCE(!irqs_disabled());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						now = sched_clock();
 | 
				
			||||||
 | 
						now_gtod = ktime_to_ns(ktime_get());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__raw_spin_lock(&scd->lock);
 | 
				
			||||||
 | 
						__update_sched_clock(scd, now);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * update tick_gtod after __update_sched_clock() because that will
 | 
				
			||||||
 | 
						 * already observe 1 new jiffy; adding a new tick_gtod to that would
 | 
				
			||||||
 | 
						 * increase the clock 2 jiffies.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						scd->tick_raw = now;
 | 
				
			||||||
 | 
						scd->tick_gtod = now_gtod;
 | 
				
			||||||
 | 
						__raw_spin_unlock(&scd->lock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * We are going deep-idle (irqs are disabled):
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void sched_clock_idle_sleep_event(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						sched_clock_cpu(smp_processor_id());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * We just idled delta nanoseconds (called with irqs disabled):
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void sched_clock_idle_wakeup_event(u64 delta_ns)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sched_clock_data *scd = this_scd();
 | 
				
			||||||
 | 
						u64 now = sched_clock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Override the previous timestamp and ignore all
 | 
				
			||||||
 | 
						 * sched_clock() deltas that occured while we idled,
 | 
				
			||||||
 | 
						 * and use the PM-provided delta_ns to advance the
 | 
				
			||||||
 | 
						 * rq clock:
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__raw_spin_lock(&scd->lock);
 | 
				
			||||||
 | 
						scd->prev_raw = now;
 | 
				
			||||||
 | 
						scd->clock += delta_ns;
 | 
				
			||||||
 | 
						__raw_spin_unlock(&scd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						touch_softlockup_watchdog();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Scheduler clock - returns current time in nanosec units.
 | 
				
			||||||
 | 
					 * This is default implementation.
 | 
				
			||||||
 | 
					 * Architectures and sub-architectures can override this.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					unsigned long long __attribute__((weak)) sched_clock(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 | 
				
			||||||
	PN(next_balance);
 | 
						PN(next_balance);
 | 
				
			||||||
	P(curr->pid);
 | 
						P(curr->pid);
 | 
				
			||||||
	PN(clock);
 | 
						PN(clock);
 | 
				
			||||||
	PN(idle_clock);
 | 
					 | 
				
			||||||
	PN(prev_clock_raw);
 | 
					 | 
				
			||||||
	P(clock_warps);
 | 
					 | 
				
			||||||
	P(clock_overflows);
 | 
					 | 
				
			||||||
	P(clock_underflows);
 | 
					 | 
				
			||||||
	P(clock_deep_idle_events);
 | 
					 | 
				
			||||||
	PN(clock_max_delta);
 | 
					 | 
				
			||||||
	P(cpu_load[0]);
 | 
						P(cpu_load[0]);
 | 
				
			||||||
	P(cpu_load[1]);
 | 
						P(cpu_load[1]);
 | 
				
			||||||
	P(cpu_load[2]);
 | 
						P(cpu_load[2]);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -959,7 +959,7 @@ static void yield_task_fair(struct rq *rq)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
 | 
						if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
 | 
				
			||||||
		__update_rq_clock(rq);
 | 
							update_rq_clock(rq);
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Update run-time statistics of the 'current'.
 | 
							 * Update run-time statistics of the 'current'.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue