mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
		
			
				
	
	
		
			67 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			67 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#include <linux/module.h>
 | 
						|
#include <linux/bitops.h>
 | 
						|
#include <asm/types.h>
 | 
						|
 | 
						|
/**
 | 
						|
 * hweightN - returns the hamming weight of a N-bit word
 | 
						|
 * @x: the word to weigh
 | 
						|
 *
 | 
						|
 * The Hamming Weight of a number is the total number of bits set in it.
 | 
						|
 */
 | 
						|
 | 
						|
unsigned int __sw_hweight32(unsigned int w)
 | 
						|
{
 | 
						|
#ifdef ARCH_HAS_FAST_MULTIPLIER
 | 
						|
	w -= (w >> 1) & 0x55555555;
 | 
						|
	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
 | 
						|
	w =  (w + (w >> 4)) & 0x0f0f0f0f;
 | 
						|
	return (w * 0x01010101) >> 24;
 | 
						|
#else
 | 
						|
	unsigned int res = w - ((w >> 1) & 0x55555555);
 | 
						|
	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 | 
						|
	res = (res + (res >> 4)) & 0x0F0F0F0F;
 | 
						|
	res = res + (res >> 8);
 | 
						|
	return (res + (res >> 16)) & 0x000000FF;
 | 
						|
#endif
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__sw_hweight32);
 | 
						|
 | 
						|
unsigned int __sw_hweight16(unsigned int w)
 | 
						|
{
 | 
						|
	unsigned int res = w - ((w >> 1) & 0x5555);
 | 
						|
	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 | 
						|
	res = (res + (res >> 4)) & 0x0F0F;
 | 
						|
	return (res + (res >> 8)) & 0x00FF;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__sw_hweight16);
 | 
						|
 | 
						|
unsigned int __sw_hweight8(unsigned int w)
 | 
						|
{
 | 
						|
	unsigned int res = w - ((w >> 1) & 0x55);
 | 
						|
	res = (res & 0x33) + ((res >> 2) & 0x33);
 | 
						|
	return (res + (res >> 4)) & 0x0F;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__sw_hweight8);
 | 
						|
 | 
						|
unsigned long __sw_hweight64(__u64 w)
 | 
						|
{
 | 
						|
#if BITS_PER_LONG == 32
 | 
						|
	return __sw_hweight32((unsigned int)(w >> 32)) +
 | 
						|
	       __sw_hweight32((unsigned int)w);
 | 
						|
#elif BITS_PER_LONG == 64
 | 
						|
#ifdef ARCH_HAS_FAST_MULTIPLIER
 | 
						|
	w -= (w >> 1) & 0x5555555555555555ul;
 | 
						|
	w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
 | 
						|
	w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
 | 
						|
	return (w * 0x0101010101010101ul) >> 56;
 | 
						|
#else
 | 
						|
	__u64 res = w - ((w >> 1) & 0x5555555555555555ul);
 | 
						|
	res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
 | 
						|
	res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
 | 
						|
	res = res + (res >> 8);
 | 
						|
	res = res + (res >> 16);
 | 
						|
	return (res + (res >> 32)) & 0x00000000000000FFul;
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__sw_hweight64);
 |