mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting
    the range of the cpumask operators that loop over the bits in a cpumask_t
    variable.  This removes a large amount of wasted cpu cycles.
  * Add performance variants of the cpumask operators:
    int cpus_weight_nr(mask)	     Same using nr_cpu_ids instead of NR_CPUS
    int first_cpu_nr(mask)	     Number lowest set bit, or nr_cpu_ids
    int next_cpu_nr(cpu, mask)	     Next cpu past 'cpu', or nr_cpu_ids
    for_each_cpu_mask_nr(cpu, mask)  for-loop cpu over mask using nr_cpu_ids
  * Modify following to use performance variants:
    #define num_online_cpus()	cpus_weight_nr(cpu_online_map)
    #define num_possible_cpus()	cpus_weight_nr(cpu_possible_map)
    #define num_present_cpus()	cpus_weight_nr(cpu_present_map)
    #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
    #define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), ...)
    #define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), ...)
  * Comment added to include/linux/cpumask.h:
    Note: The alternate operations with the suffix "_nr" are used
	  to limit the range of the loop to nr_cpu_ids instead of
	  NR_CPUS when NR_CPUS > 64 for performance reasons.
	  If NR_CPUS is <= 64 then most assembler bitmask
	  operators execute faster with a constant range, so
	  the operator will continue to use NR_CPUS.
	  Another consideration is that nr_cpu_ids is initialized
	  to NR_CPUS and isn't lowered until the possible cpus are
	  discovered (including any disabled cpus).  So early uses
	  will span the entire range of NR_CPUS.
    (The net effect is that for systems with 64 or less CPU's there are no
     functional changes.)
For inclusion into sched-devel/latest tree.
Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
			
			
This commit is contained in:
		
							parent
							
								
									143aa5c53b
								
							
						
					
					
						commit
						41df0d61c2
					
				
					 2 changed files with 71 additions and 30 deletions
				
			
		| 
						 | 
					@ -17,6 +17,20 @@
 | 
				
			||||||
 * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
 | 
					 * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
 | 
				
			||||||
 * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
 | 
					 * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 | 
					 * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 | 
				
			||||||
 | 
					 * Note: The alternate operations with the suffix "_nr" are used
 | 
				
			||||||
 | 
					 *       to limit the range of the loop to nr_cpu_ids instead of
 | 
				
			||||||
 | 
					 *       NR_CPUS when NR_CPUS > 64 for performance reasons.
 | 
				
			||||||
 | 
					 *       If NR_CPUS is <= 64 then most assembler bitmask
 | 
				
			||||||
 | 
					 *       operators execute faster with a constant range, so
 | 
				
			||||||
 | 
					 *       the operator will continue to use NR_CPUS.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *       Another consideration is that nr_cpu_ids is initialized
 | 
				
			||||||
 | 
					 *       to NR_CPUS and isn't lowered until the possible cpus are
 | 
				
			||||||
 | 
					 *       discovered (including any disabled cpus).  So early uses
 | 
				
			||||||
 | 
					 *       will span the entire range of NR_CPUS.
 | 
				
			||||||
 | 
					 * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * The available cpumask operations are:
 | 
					 * The available cpumask operations are:
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
 | 
					 * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
 | 
				
			||||||
| 
						 | 
					@ -38,12 +52,14 @@
 | 
				
			||||||
 * int cpus_empty(mask)			Is mask empty (no bits sets)?
 | 
					 * int cpus_empty(mask)			Is mask empty (no bits sets)?
 | 
				
			||||||
 * int cpus_full(mask)			Is mask full (all bits sets)?
 | 
					 * int cpus_full(mask)			Is mask full (all bits sets)?
 | 
				
			||||||
 * int cpus_weight(mask)		Hamming weigh - number of set bits
 | 
					 * int cpus_weight(mask)		Hamming weigh - number of set bits
 | 
				
			||||||
 | 
					 * int cpus_weight_nr(mask)		Same using nr_cpu_ids instead of NR_CPUS
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * void cpus_shift_right(dst, src, n)	Shift right
 | 
					 * void cpus_shift_right(dst, src, n)	Shift right
 | 
				
			||||||
 * void cpus_shift_left(dst, src, n)	Shift left
 | 
					 * void cpus_shift_left(dst, src, n)	Shift left
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
 | 
					 * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
 | 
				
			||||||
 * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
 | 
					 * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
 | 
				
			||||||
 | 
					 * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
 | 
					 * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
 | 
				
			||||||
 * CPU_MASK_ALL				Initializer - all bits set
 | 
					 * CPU_MASK_ALL				Initializer - all bits set
 | 
				
			||||||
| 
						 | 
					@ -59,7 +75,8 @@
 | 
				
			||||||
 * void cpus_onto(dst, orig, relmap)	*dst = orig relative to relmap
 | 
					 * void cpus_onto(dst, orig, relmap)	*dst = orig relative to relmap
 | 
				
			||||||
 * void cpus_fold(dst, orig, sz)	dst bits = orig bits mod sz
 | 
					 * void cpus_fold(dst, orig, sz)	dst bits = orig bits mod sz
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask
 | 
					 * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask using NR_CPUS
 | 
				
			||||||
 | 
					 * for_each_cpu_mask_nr(cpu, mask)	for-loop cpu over mask using nr_cpu_ids
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * int num_online_cpus()		Number of online CPUs
 | 
					 * int num_online_cpus()		Number of online CPUs
 | 
				
			||||||
 * int num_possible_cpus()		Number of all possible CPUs
 | 
					 * int num_possible_cpus()		Number of all possible CPUs
 | 
				
			||||||
| 
						 | 
					@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 | 
				
			||||||
	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 | 
						bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					 | 
				
			||||||
int __first_cpu(const cpumask_t *srcp);
 | 
					 | 
				
			||||||
#define first_cpu(src) __first_cpu(&(src))
 | 
					 | 
				
			||||||
int __next_cpu(int n, const cpumask_t *srcp);
 | 
					 | 
				
			||||||
#define next_cpu(n, src) __next_cpu((n), &(src))
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define first_cpu(src)		({ (void)(src); 0; })
 | 
					 | 
				
			||||||
#define next_cpu(n, src)	({ (void)(src); 1; })
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
 | 
					#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
 | 
				
			||||||
extern cpumask_t *cpumask_of_cpu_map;
 | 
					extern cpumask_t *cpumask_of_cpu_map;
 | 
				
			||||||
| 
						 | 
					@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
 | 
				
			||||||
	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
 | 
						bitmap_fold(dstp->bits, origp->bits, sz, nbits);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if NR_CPUS > 1
 | 
					#if NR_CPUS == 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define nr_cpu_ids		1
 | 
				
			||||||
 | 
					#define first_cpu(src)		({ (void)(src); 0; })
 | 
				
			||||||
 | 
					#define next_cpu(n, src)	({ (void)(src); 1; })
 | 
				
			||||||
 | 
					#define any_online_cpu(mask)	0
 | 
				
			||||||
 | 
					#define for_each_cpu_mask(cpu, mask)	\
 | 
				
			||||||
 | 
						for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else /* NR_CPUS > 1 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern int nr_cpu_ids;
 | 
				
			||||||
 | 
					int __first_cpu(const cpumask_t *srcp);
 | 
				
			||||||
 | 
					int __next_cpu(int n, const cpumask_t *srcp);
 | 
				
			||||||
 | 
					int __any_online_cpu(const cpumask_t *mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define first_cpu(src)		__first_cpu(&(src))
 | 
				
			||||||
 | 
					#define next_cpu(n, src)	__next_cpu((n), &(src))
 | 
				
			||||||
 | 
					#define any_online_cpu(mask) __any_online_cpu(&(mask))
 | 
				
			||||||
#define for_each_cpu_mask(cpu, mask)		\
 | 
					#define for_each_cpu_mask(cpu, mask)		\
 | 
				
			||||||
	for ((cpu) = first_cpu(mask);		\
 | 
						for ((cpu) = first_cpu(mask);		\
 | 
				
			||||||
		(cpu) < NR_CPUS;		\
 | 
							(cpu) < NR_CPUS;		\
 | 
				
			||||||
		(cpu) = next_cpu((cpu), (mask)))
 | 
							(cpu) = next_cpu((cpu), (mask)))
 | 
				
			||||||
#else /* NR_CPUS == 1 */
 | 
					#endif
 | 
				
			||||||
#define for_each_cpu_mask(cpu, mask)		\
 | 
					
 | 
				
			||||||
	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 | 
					#if NR_CPUS <= 64
 | 
				
			||||||
#endif /* NR_CPUS */
 | 
					
 | 
				
			||||||
 | 
					#define next_cpu_nr(n, src)		next_cpu(n, src)
 | 
				
			||||||
 | 
					#define cpus_weight_nr(cpumask)		cpus_weight(cpumask)
 | 
				
			||||||
 | 
					#define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else /* NR_CPUS > 64 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int __next_cpu_nr(int n, const cpumask_t *srcp);
 | 
				
			||||||
 | 
					#define next_cpu_nr(n, src)	__next_cpu_nr((n), &(src))
 | 
				
			||||||
 | 
					#define cpus_weight_nr(cpumask)	__cpus_weight(&(cpumask), nr_cpu_ids)
 | 
				
			||||||
 | 
					#define for_each_cpu_mask_nr(cpu, mask)		\
 | 
				
			||||||
 | 
						for ((cpu) = first_cpu(mask);		\
 | 
				
			||||||
 | 
							(cpu) < nr_cpu_ids;		\
 | 
				
			||||||
 | 
							(cpu) = next_cpu_nr((cpu), (mask)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif /* NR_CPUS > 64 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * The following particular system cpumasks and operations manage
 | 
					 * The following particular system cpumasks and operations manage
 | 
				
			||||||
| 
						 | 
					@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map;
 | 
				
			||||||
extern cpumask_t cpu_present_map;
 | 
					extern cpumask_t cpu_present_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if NR_CPUS > 1
 | 
					#if NR_CPUS > 1
 | 
				
			||||||
#define num_online_cpus()	cpus_weight(cpu_online_map)
 | 
					#define num_online_cpus()	cpus_weight_nr(cpu_online_map)
 | 
				
			||||||
#define num_possible_cpus()	cpus_weight(cpu_possible_map)
 | 
					#define num_possible_cpus()	cpus_weight_nr(cpu_possible_map)
 | 
				
			||||||
#define num_present_cpus()	cpus_weight(cpu_present_map)
 | 
					#define num_present_cpus()	cpus_weight_nr(cpu_present_map)
 | 
				
			||||||
#define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
 | 
					#define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
 | 
				
			||||||
#define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
 | 
					#define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
 | 
				
			||||||
#define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
 | 
					#define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
 | 
				
			||||||
| 
						 | 
					@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
 | 
					#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
 | 
				
			||||||
extern int nr_cpu_ids;
 | 
					#define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
 | 
				
			||||||
#define any_online_cpu(mask) __any_online_cpu(&(mask))
 | 
					#define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
 | 
				
			||||||
int __any_online_cpu(const cpumask_t *mask);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define nr_cpu_ids			1
 | 
					 | 
				
			||||||
#define any_online_cpu(mask)		0
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define for_each_possible_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
 | 
					 | 
				
			||||||
#define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
 | 
					 | 
				
			||||||
#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* __LINUX_CPUMASK_H */
 | 
					#endif /* __LINUX_CPUMASK_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(__next_cpu);
 | 
					EXPORT_SYMBOL(__next_cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if NR_CPUS > 64
 | 
				
			||||||
 | 
					int __next_cpu_nr(int n, const cpumask_t *srcp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return min_t(int, nr_cpu_ids,
 | 
				
			||||||
 | 
									find_next_bit(srcp->bits, nr_cpu_ids, n+1));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__next_cpu_nr);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __any_online_cpu(const cpumask_t *mask)
 | 
					int __any_online_cpu(const cpumask_t *mask)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int cpu;
 | 
						int cpu;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue