mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: rcu: fix rcutorture bug rcu: eliminate synchronize_rcu_xxx macro rcu: make treercu safe for suspend and resume rcu: fix rcutree grace-period-latency bug on small systems futex: catch certain assymetric (get|put)_futex_key calls futex: make futex_(get|put)_key() calls symmetric locking, percpu counters: introduce separate lock classes swiotlb: clean up EXPORT_SYMBOL usage swiotlb: remove unnecessary declaration swiotlb: replace architecture-specific swiotlb.h with linux/swiotlb.h swiotlb: add support for systems with highmem swiotlb: store phys address in io_tlb_orig_addr array swiotlb: add hwdev to swiotlb_phys_to_bus() / swiotlb_sg_to_bus()
		
			
				
	
	
		
			407 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			407 lines
		
	
	
	
		
			9.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Floating proportions
 | 
						|
 *
 | 
						|
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 | 
						|
 *
 | 
						|
 * Description:
 | 
						|
 *
 | 
						|
 * The floating proportion is a time derivative with an exponentially decaying
 | 
						|
 * history:
 | 
						|
 *
 | 
						|
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 | 
						|
 *
 | 
						|
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 | 
						|
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 | 
						|
 * the differential over the i-th last period.
 | 
						|
 *
 | 
						|
 * The decaying history gives smooth transitions. The time differential carries
 | 
						|
 * the notion of speed.
 | 
						|
 *
 | 
						|
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 | 
						|
 *
 | 
						|
 *   \Sum_{i=0} 1/2^(1+i) = 1
 | 
						|
 *
 | 
						|
 * Further more, if we measure time (t) in the same events as x; so that:
 | 
						|
 *
 | 
						|
 *   t = \Sum_{j} x_{j}
 | 
						|
 *
 | 
						|
 * we get that:
 | 
						|
 *
 | 
						|
 *   \Sum_{j} p_{j} = 1
 | 
						|
 *
 | 
						|
 * Writing this in an iterative fashion we get (dropping the 'd's):
 | 
						|
 *
 | 
						|
 *   if (++x_{j}, ++t > period)
 | 
						|
 *     t /= 2;
 | 
						|
 *     for_each (j)
 | 
						|
 *       x_{j} /= 2;
 | 
						|
 *
 | 
						|
 * so that:
 | 
						|
 *
 | 
						|
 *   p_{j} = x_{j} / t;
 | 
						|
 *
 | 
						|
 * We optimize away the '/= 2' for the global time delta by noting that:
 | 
						|
 *
 | 
						|
 *   if (++t > period) t /= 2:
 | 
						|
 *
 | 
						|
 * Can be approximated by:
 | 
						|
 *
 | 
						|
 *   period/2 + (++t % period/2)
 | 
						|
 *
 | 
						|
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 | 
						|
 *   binary operations and wraparound artefacts disappear. ]
 | 
						|
 *
 | 
						|
 * Also note that this yields a natural counter of the elapsed periods:
 | 
						|
 *
 | 
						|
 *   c = t / (period/2)
 | 
						|
 *
 | 
						|
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 | 
						|
 *   around issue. ]
 | 
						|
 *
 | 
						|
 * This allows us to do away with the loop over all prop_locals on each period
 | 
						|
 * expiration. By remembering the period count under which it was last accessed
 | 
						|
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 | 
						|
 *
 | 
						|
 *   c - c_{j}
 | 
						|
 *
 | 
						|
 * We can then lazily catch up to the global period count every time we are
 | 
						|
 * going to use x_{j}, by doing:
 | 
						|
 *
 | 
						|
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/proportions.h>
 | 
						|
#include <linux/rcupdate.h>
 | 
						|
 | 
						|
int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 | 
						|
{
 | 
						|
	int err;
 | 
						|
 | 
						|
	if (shift > PROP_MAX_SHIFT)
 | 
						|
		shift = PROP_MAX_SHIFT;
 | 
						|
 | 
						|
	pd->index = 0;
 | 
						|
	pd->pg[0].shift = shift;
 | 
						|
	mutex_init(&pd->mutex);
 | 
						|
	err = percpu_counter_init(&pd->pg[0].events, 0);
 | 
						|
	if (err)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	err = percpu_counter_init(&pd->pg[1].events, 0);
 | 
						|
	if (err)
 | 
						|
		percpu_counter_destroy(&pd->pg[0].events);
 | 
						|
 | 
						|
out:
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * We have two copies, and flip between them to make it seem like an atomic
 | 
						|
 * update. The update is not really atomic wrt the events counter, but
 | 
						|
 * it is internally consistent with the bit layout depending on shift.
 | 
						|
 *
 | 
						|
 * We copy the events count, move the bits around and flip the index.
 | 
						|
 */
 | 
						|
void prop_change_shift(struct prop_descriptor *pd, int shift)
 | 
						|
{
 | 
						|
	int index;
 | 
						|
	int offset;
 | 
						|
	u64 events;
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	if (shift > PROP_MAX_SHIFT)
 | 
						|
		shift = PROP_MAX_SHIFT;
 | 
						|
 | 
						|
	mutex_lock(&pd->mutex);
 | 
						|
 | 
						|
	index = pd->index ^ 1;
 | 
						|
	offset = pd->pg[pd->index].shift - shift;
 | 
						|
	if (!offset)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	pd->pg[index].shift = shift;
 | 
						|
 | 
						|
	local_irq_save(flags);
 | 
						|
	events = percpu_counter_sum(&pd->pg[pd->index].events);
 | 
						|
	if (offset < 0)
 | 
						|
		events <<= -offset;
 | 
						|
	else
 | 
						|
		events >>= offset;
 | 
						|
	percpu_counter_set(&pd->pg[index].events, events);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * ensure the new pg is fully written before the switch
 | 
						|
	 */
 | 
						|
	smp_wmb();
 | 
						|
	pd->index = index;
 | 
						|
	local_irq_restore(flags);
 | 
						|
 | 
						|
	synchronize_rcu();
 | 
						|
 | 
						|
out:
 | 
						|
	mutex_unlock(&pd->mutex);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * wrap the access to the data in an rcu_read_lock() section;
 | 
						|
 * this is used to track the active references.
 | 
						|
 */
 | 
						|
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 | 
						|
__acquires(RCU)
 | 
						|
{
 | 
						|
	int index;
 | 
						|
 | 
						|
	rcu_read_lock();
 | 
						|
	index = pd->index;
 | 
						|
	/*
 | 
						|
	 * match the wmb from vcd_flip()
 | 
						|
	 */
 | 
						|
	smp_rmb();
 | 
						|
	return &pd->pg[index];
 | 
						|
}
 | 
						|
 | 
						|
static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
 | 
						|
__releases(RCU)
 | 
						|
{
 | 
						|
	rcu_read_unlock();
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 | 
						|
{
 | 
						|
	int offset = *pl_shift - new_shift;
 | 
						|
 | 
						|
	if (!offset)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (offset < 0)
 | 
						|
		*pl_period <<= -offset;
 | 
						|
	else
 | 
						|
		*pl_period >>= offset;
 | 
						|
 | 
						|
	*pl_shift = new_shift;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * PERCPU
 | 
						|
 */
 | 
						|
 | 
						|
#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 | 
						|
 | 
						|
int prop_local_init_percpu(struct prop_local_percpu *pl)
 | 
						|
{
 | 
						|
	spin_lock_init(&pl->lock);
 | 
						|
	pl->shift = 0;
 | 
						|
	pl->period = 0;
 | 
						|
	return percpu_counter_init(&pl->events, 0);
 | 
						|
}
 | 
						|
 | 
						|
void prop_local_destroy_percpu(struct prop_local_percpu *pl)
 | 
						|
{
 | 
						|
	percpu_counter_destroy(&pl->events);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Catch up with missed period expirations.
 | 
						|
 *
 | 
						|
 *   until (c_{j} == c)
 | 
						|
 *     x_{j} -= x_{j}/2;
 | 
						|
 *     c_{j}++;
 | 
						|
 */
 | 
						|
static
 | 
						|
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 | 
						|
{
 | 
						|
	unsigned long period = 1UL << (pg->shift - 1);
 | 
						|
	unsigned long period_mask = ~(period - 1);
 | 
						|
	unsigned long global_period;
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	global_period = percpu_counter_read(&pg->events);
 | 
						|
	global_period &= period_mask;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Fast path - check if the local and global period count still match
 | 
						|
	 * outside of the lock.
 | 
						|
	 */
 | 
						|
	if (pl->period == global_period)
 | 
						|
		return;
 | 
						|
 | 
						|
	spin_lock_irqsave(&pl->lock, flags);
 | 
						|
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * For each missed period, we half the local counter.
 | 
						|
	 * basically:
 | 
						|
	 *   pl->events >> (global_period - pl->period);
 | 
						|
	 */
 | 
						|
	period = (global_period - pl->period) >> (pg->shift - 1);
 | 
						|
	if (period < BITS_PER_LONG) {
 | 
						|
		s64 val = percpu_counter_read(&pl->events);
 | 
						|
 | 
						|
		if (val < (nr_cpu_ids * PROP_BATCH))
 | 
						|
			val = percpu_counter_sum(&pl->events);
 | 
						|
 | 
						|
		__percpu_counter_add(&pl->events, -val + (val >> period),
 | 
						|
					PROP_BATCH);
 | 
						|
	} else
 | 
						|
		percpu_counter_set(&pl->events, 0);
 | 
						|
 | 
						|
	pl->period = global_period;
 | 
						|
	spin_unlock_irqrestore(&pl->lock, flags);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 *   ++x_{j}, ++t
 | 
						|
 */
 | 
						|
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 | 
						|
{
 | 
						|
	struct prop_global *pg = prop_get_global(pd);
 | 
						|
 | 
						|
	prop_norm_percpu(pg, pl);
 | 
						|
	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
 | 
						|
	percpu_counter_add(&pg->events, 1);
 | 
						|
	prop_put_global(pd, pg);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 | 
						|
 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 | 
						|
 */
 | 
						|
void __prop_inc_percpu_max(struct prop_descriptor *pd,
 | 
						|
			   struct prop_local_percpu *pl, long frac)
 | 
						|
{
 | 
						|
	struct prop_global *pg = prop_get_global(pd);
 | 
						|
 | 
						|
	prop_norm_percpu(pg, pl);
 | 
						|
 | 
						|
	if (unlikely(frac != PROP_FRAC_BASE)) {
 | 
						|
		unsigned long period_2 = 1UL << (pg->shift - 1);
 | 
						|
		unsigned long counter_mask = period_2 - 1;
 | 
						|
		unsigned long global_count;
 | 
						|
		long numerator, denominator;
 | 
						|
 | 
						|
		numerator = percpu_counter_read_positive(&pl->events);
 | 
						|
		global_count = percpu_counter_read(&pg->events);
 | 
						|
		denominator = period_2 + (global_count & counter_mask);
 | 
						|
 | 
						|
		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
 | 
						|
			goto out_put;
 | 
						|
	}
 | 
						|
 | 
						|
	percpu_counter_add(&pl->events, 1);
 | 
						|
	percpu_counter_add(&pg->events, 1);
 | 
						|
 | 
						|
out_put:
 | 
						|
	prop_put_global(pd, pg);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Obtain a fraction of this proportion
 | 
						|
 *
 | 
						|
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 | 
						|
 */
 | 
						|
void prop_fraction_percpu(struct prop_descriptor *pd,
 | 
						|
		struct prop_local_percpu *pl,
 | 
						|
		long *numerator, long *denominator)
 | 
						|
{
 | 
						|
	struct prop_global *pg = prop_get_global(pd);
 | 
						|
	unsigned long period_2 = 1UL << (pg->shift - 1);
 | 
						|
	unsigned long counter_mask = period_2 - 1;
 | 
						|
	unsigned long global_count;
 | 
						|
 | 
						|
	prop_norm_percpu(pg, pl);
 | 
						|
	*numerator = percpu_counter_read_positive(&pl->events);
 | 
						|
 | 
						|
	global_count = percpu_counter_read(&pg->events);
 | 
						|
	*denominator = period_2 + (global_count & counter_mask);
 | 
						|
 | 
						|
	prop_put_global(pd, pg);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * SINGLE
 | 
						|
 */
 | 
						|
 | 
						|
int prop_local_init_single(struct prop_local_single *pl)
 | 
						|
{
 | 
						|
	spin_lock_init(&pl->lock);
 | 
						|
	pl->shift = 0;
 | 
						|
	pl->period = 0;
 | 
						|
	pl->events = 0;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
void prop_local_destroy_single(struct prop_local_single *pl)
 | 
						|
{
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Catch up with missed period expirations.
 | 
						|
 */
 | 
						|
static
 | 
						|
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 | 
						|
{
 | 
						|
	unsigned long period = 1UL << (pg->shift - 1);
 | 
						|
	unsigned long period_mask = ~(period - 1);
 | 
						|
	unsigned long global_period;
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	global_period = percpu_counter_read(&pg->events);
 | 
						|
	global_period &= period_mask;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Fast path - check if the local and global period count still match
 | 
						|
	 * outside of the lock.
 | 
						|
	 */
 | 
						|
	if (pl->period == global_period)
 | 
						|
		return;
 | 
						|
 | 
						|
	spin_lock_irqsave(&pl->lock, flags);
 | 
						|
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 | 
						|
	/*
 | 
						|
	 * For each missed period, we half the local counter.
 | 
						|
	 */
 | 
						|
	period = (global_period - pl->period) >> (pg->shift - 1);
 | 
						|
	if (likely(period < BITS_PER_LONG))
 | 
						|
		pl->events >>= period;
 | 
						|
	else
 | 
						|
		pl->events = 0;
 | 
						|
	pl->period = global_period;
 | 
						|
	spin_unlock_irqrestore(&pl->lock, flags);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 *   ++x_{j}, ++t
 | 
						|
 */
 | 
						|
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
 | 
						|
{
 | 
						|
	struct prop_global *pg = prop_get_global(pd);
 | 
						|
 | 
						|
	prop_norm_single(pg, pl);
 | 
						|
	pl->events++;
 | 
						|
	percpu_counter_add(&pg->events, 1);
 | 
						|
	prop_put_global(pd, pg);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Obtain a fraction of this proportion
 | 
						|
 *
 | 
						|
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 | 
						|
 */
 | 
						|
void prop_fraction_single(struct prop_descriptor *pd,
 | 
						|
	       	struct prop_local_single *pl,
 | 
						|
		long *numerator, long *denominator)
 | 
						|
{
 | 
						|
	struct prop_global *pg = prop_get_global(pd);
 | 
						|
	unsigned long period_2 = 1UL << (pg->shift - 1);
 | 
						|
	unsigned long counter_mask = period_2 - 1;
 | 
						|
	unsigned long global_count;
 | 
						|
 | 
						|
	prop_norm_single(pg, pl);
 | 
						|
	*numerator = pl->events;
 | 
						|
 | 
						|
	global_count = percpu_counter_read(&pg->events);
 | 
						|
	*denominator = period_2 + (global_count & counter_mask);
 | 
						|
 | 
						|
	prop_put_global(pd, pg);
 | 
						|
}
 |