mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Make the architecture-optimized CRC code do its CPU feature checks in subsys_initcalls instead of arch_initcalls. This makes it consistent with arch/*/lib/crypto/ and ensures that it runs after initcalls that possibly could be a prerequisite for kernel-mode FPU, such as x86's xfd_update_static_branch() and loongarch's init_euen_mask(). Note: as far as I can tell, x86's xfd_update_static_branch() isn't *actually* needed for kernel-mode FPU. loongarch's init_euen_mask() is needed to enable save/restore of the vector registers, but loongarch doesn't yet have any CRC or crypto code that uses vector registers anyway. Regardless, let's be consistent with arch/*/lib/crypto/ and robust against any potential future dependency on an arch_initcall. Link: https://lore.kernel.org/r/20250510035959.87995-1-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com>
		
			
				
	
	
		
			111 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			111 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-only
 | 
						|
/*
 | 
						|
 * x86-optimized CRC32 functions
 | 
						|
 *
 | 
						|
 * Copyright (C) 2008 Intel Corporation
 | 
						|
 * Copyright 2012 Xyratex Technology Limited
 | 
						|
 * Copyright 2024 Google LLC
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/crc32.h>
 | 
						|
#include <linux/module.h>
 | 
						|
#include "crc-pclmul-template.h"
 | 
						|
 | 
						|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
 | 
						|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 | 
						|
 | 
						|
DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
 | 
						|
 | 
						|
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 | 
						|
{
 | 
						|
	CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
 | 
						|
		   have_pclmulqdq);
 | 
						|
	return crc32_le_base(crc, p, len);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(crc32_le_arch);
 | 
						|
 | 
						|
#ifdef CONFIG_X86_64
 | 
						|
#define CRC32_INST "crc32q %1, %q0"
 | 
						|
#else
 | 
						|
#define CRC32_INST "crc32l %1, %0"
 | 
						|
#endif
 | 
						|
 | 
						|
/*
 | 
						|
 * Use carryless multiply version of crc32c when buffer size is >= 512 to
 | 
						|
 * account for FPU state save/restore overhead.
 | 
						|
 */
 | 
						|
#define CRC32C_PCLMUL_BREAKEVEN	512
 | 
						|
 | 
						|
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
 | 
						|
 | 
						|
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 | 
						|
{
 | 
						|
	size_t num_longs;
 | 
						|
 | 
						|
	if (!static_branch_likely(&have_crc32))
 | 
						|
		return crc32c_base(crc, p, len);
 | 
						|
 | 
						|
	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
 | 
						|
	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
 | 
						|
		kernel_fpu_begin();
 | 
						|
		crc = crc32c_x86_3way(crc, p, len);
 | 
						|
		kernel_fpu_end();
 | 
						|
		return crc;
 | 
						|
	}
 | 
						|
 | 
						|
	for (num_longs = len / sizeof(unsigned long);
 | 
						|
	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
 | 
						|
		asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
 | 
						|
 | 
						|
	if (sizeof(unsigned long) > 4 && (len & 4)) {
 | 
						|
		asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
 | 
						|
		p += 4;
 | 
						|
	}
 | 
						|
	if (len & 2) {
 | 
						|
		asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
 | 
						|
		p += 2;
 | 
						|
	}
 | 
						|
	if (len & 1)
 | 
						|
		asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
 | 
						|
 | 
						|
	return crc;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(crc32c_arch);
 | 
						|
 | 
						|
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 | 
						|
{
 | 
						|
	return crc32_be_base(crc, p, len);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(crc32_be_arch);
 | 
						|
 | 
						|
static int __init crc32_x86_init(void)
 | 
						|
{
 | 
						|
	if (boot_cpu_has(X86_FEATURE_XMM4_2))
 | 
						|
		static_branch_enable(&have_crc32);
 | 
						|
	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
 | 
						|
		static_branch_enable(&have_pclmulqdq);
 | 
						|
		INIT_CRC_PCLMUL(crc32_lsb);
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
subsys_initcall(crc32_x86_init);
 | 
						|
 | 
						|
static void __exit crc32_x86_exit(void)
 | 
						|
{
 | 
						|
}
 | 
						|
module_exit(crc32_x86_exit);
 | 
						|
 | 
						|
u32 crc32_optimizations(void)
 | 
						|
{
 | 
						|
	u32 optimizations = 0;
 | 
						|
 | 
						|
	if (static_key_enabled(&have_crc32))
 | 
						|
		optimizations |= CRC32C_OPTIMIZATION;
 | 
						|
	if (static_key_enabled(&have_pclmulqdq))
 | 
						|
		optimizations |= CRC32_LE_OPTIMIZATION;
 | 
						|
	return optimizations;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(crc32_optimizations);
 | 
						|
 | 
						|
MODULE_DESCRIPTION("x86-optimized CRC32 functions");
 | 
						|
MODULE_LICENSE("GPL");
 |