mirror of
https://github.com/torvalds/linux.git
synced 2025-11-02 01:29:02 +02:00
- Micro-optimize the x86 bitops code
- Define target-specific {raw,this}_cpu_try_cmpxchg{64,128}() to improve code generation
- Define and use raw_cpu_try_cmpxchg() preempt_count_set()
- Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
- Remove the unused __sw_hweight64() implementation on x86-32
- Misc fixes and cleanups
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmU9BJkRHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1iBKBAAl++uUEmjJM2nfS1AtKS5Rn0YJGoj7ZLy
MMjFJwYzw7nWqbkCZJqQATicmOVvdEUacYibYYfX4QkH3ylC9Av3ta1HeUEzqLpX
qG8W4jJPu/qlAOtGI4mQkq/yVminea6xy6l0vcCF+pezUKnwADP/YSL2Zsg03UsX
Nelty29NrpN/qCcLUJk40CHRjBhfYBVEk+HtCMahnftzLSNZWHYTgoYA9x4zm4Hg
LGiION+dwJKwaafaNw8/k1ikRJYfc5c1ZImi7YoOeCXXtBq7VHOHf76axok42m4s
2FJ7QefioQ/Gs1Gojxd99080F4H/Elt6uj05hR2493ncN4WVNKqYBOMezrlG686D
CuoOZvMaJ1LyaEntu1YWF3dtHIDTVjHhe5dyVclgu2a+v1gP/16xTLIf2z/cWtuX
whOcalFc7AdGXnLtGACHnhbc5Yh/Ex9Y49+6WYgBrDcgNDCGdTOa/m8kFmKFgOjh
x8Ot1xUjFI3utGgMlfKpYqw281ws+DjPiVvGi+fmUf8eBsq2IJuO9/f9oyfFfFSj
1bzwrL5Qop/ccZAOxtuLnVVVZ+Cz/5wHmMTI0rRE5BsoiVUrtWZfD+E0/vqdavjG
0eabDTdwUgXNBp6ex0TajXKtQY7FKg3tzIuPqHRvhRCvlt77MoSdcWRCc37ac5GJ
M6mAeLcitu8=
=7WUt
-----END PGP SIGNATURE-----
Merge tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 assembly code updates from Ingo Molnar:
- Micro-optimize the x86 bitops code
- Define target-specific {raw,this}_cpu_try_cmpxchg{64,128}() to
improve code generation
- Define and use raw_cpu_try_cmpxchg() preempt_count_set()
- Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
- Remove the unused __sw_hweight64() implementation on x86-32
- Misc fixes and cleanups
* tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/lib: Address kernel-doc warnings
x86/entry: Fix typos in comments
x86/entry: Remove unused argument %rsi passed to exc_nmi()
x86/bitops: Remove unused __sw_hweight64() assembly implementation on x86-32
x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
x86/percpu: Use raw_cpu_try_cmpxchg() in preempt_count_set()
x86/percpu: Define raw_cpu_try_cmpxchg and this_cpu_try_cmpxchg()
x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128}
x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions
75 lines
2.2 KiB
ArmAsm
75 lines
2.2 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#include <linux/export.h>
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/asm.h>
|
|
|
|
/*
|
|
* unsigned int __sw_hweight32(unsigned int w)
|
|
* %rdi: w
|
|
*/
|
|
SYM_FUNC_START(__sw_hweight32)
|
|
|
|
#ifdef CONFIG_X86_64
|
|
movl %edi, %eax # w
|
|
#endif
|
|
__ASM_SIZE(push,) %__ASM_REG(dx)
|
|
movl %eax, %edx # w -> t
|
|
shrl %edx # t >>= 1
|
|
andl $0x55555555, %edx # t &= 0x55555555
|
|
subl %edx, %eax # w -= t
|
|
|
|
movl %eax, %edx # w -> t
|
|
shrl $2, %eax # w_tmp >>= 2
|
|
andl $0x33333333, %edx # t &= 0x33333333
|
|
andl $0x33333333, %eax # w_tmp &= 0x33333333
|
|
addl %edx, %eax # w = w_tmp + t
|
|
|
|
movl %eax, %edx # w -> t
|
|
shrl $4, %edx # t >>= 4
|
|
addl %edx, %eax # w_tmp += t
|
|
andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
|
|
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
|
|
shrl $24, %eax # w = w_tmp >> 24
|
|
__ASM_SIZE(pop,) %__ASM_REG(dx)
|
|
RET
|
|
SYM_FUNC_END(__sw_hweight32)
|
|
EXPORT_SYMBOL(__sw_hweight32)
|
|
|
|
/*
|
|
* No 32-bit variant, because it's implemented as an inline wrapper
|
|
* on top of __arch_hweight32():
|
|
*/
|
|
#ifdef CONFIG_X86_64
|
|
SYM_FUNC_START(__sw_hweight64)
|
|
pushq %rdi
|
|
pushq %rdx
|
|
|
|
movq %rdi, %rdx # w -> t
|
|
movabsq $0x5555555555555555, %rax
|
|
shrq %rdx # t >>= 1
|
|
andq %rdx, %rax # t &= 0x5555555555555555
|
|
movabsq $0x3333333333333333, %rdx
|
|
subq %rax, %rdi # w -= t
|
|
|
|
movq %rdi, %rax # w -> t
|
|
shrq $2, %rdi # w_tmp >>= 2
|
|
andq %rdx, %rax # t &= 0x3333333333333333
|
|
andq %rdi, %rdx # w_tmp &= 0x3333333333333333
|
|
addq %rdx, %rax # w = w_tmp + t
|
|
|
|
movq %rax, %rdx # w -> t
|
|
shrq $4, %rdx # t >>= 4
|
|
addq %rdx, %rax # w_tmp += t
|
|
movabsq $0x0f0f0f0f0f0f0f0f, %rdx
|
|
andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
|
|
movabsq $0x0101010101010101, %rdx
|
|
imulq %rdx, %rax # w_tmp *= 0x0101010101010101
|
|
shrq $56, %rax # w = w_tmp >> 56
|
|
|
|
popq %rdx
|
|
popq %rdi
|
|
RET
|
|
SYM_FUNC_END(__sw_hweight64)
|
|
EXPORT_SYMBOL(__sw_hweight64)
|
|
#endif
|