forked from mirrors/linux
		
	This patch adds support for vector optimized XOR and it is tested in qemu. Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Greentime Hu <greentime.hu@sifive.com> Signed-off-by: Andy Chiu <andy.chiu@sifive.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> Link: https://lore.kernel.org/r/20240115055929.4736-4-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
		
			
				
	
	
		
			81 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0-or-later */
 | 
						|
/*
 | 
						|
 * Copyright (C) 2021 SiFive
 | 
						|
 */
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <linux/export.h>
 | 
						|
#include <asm/asm.h>
 | 
						|
 | 
						|
SYM_FUNC_START(xor_regs_2_)
 | 
						|
	vsetvli a3, a0, e8, m8, ta, ma
 | 
						|
	vle8.v v0, (a1)
 | 
						|
	vle8.v v8, (a2)
 | 
						|
	sub a0, a0, a3
 | 
						|
	vxor.vv v16, v0, v8
 | 
						|
	add a2, a2, a3
 | 
						|
	vse8.v v16, (a1)
 | 
						|
	add a1, a1, a3
 | 
						|
	bnez a0, xor_regs_2_
 | 
						|
	ret
 | 
						|
SYM_FUNC_END(xor_regs_2_)
 | 
						|
EXPORT_SYMBOL(xor_regs_2_)
 | 
						|
 | 
						|
SYM_FUNC_START(xor_regs_3_)
 | 
						|
	vsetvli a4, a0, e8, m8, ta, ma
 | 
						|
	vle8.v v0, (a1)
 | 
						|
	vle8.v v8, (a2)
 | 
						|
	sub a0, a0, a4
 | 
						|
	vxor.vv v0, v0, v8
 | 
						|
	vle8.v v16, (a3)
 | 
						|
	add a2, a2, a4
 | 
						|
	vxor.vv v16, v0, v16
 | 
						|
	add a3, a3, a4
 | 
						|
	vse8.v v16, (a1)
 | 
						|
	add a1, a1, a4
 | 
						|
	bnez a0, xor_regs_3_
 | 
						|
	ret
 | 
						|
SYM_FUNC_END(xor_regs_3_)
 | 
						|
EXPORT_SYMBOL(xor_regs_3_)
 | 
						|
 | 
						|
SYM_FUNC_START(xor_regs_4_)
 | 
						|
	vsetvli a5, a0, e8, m8, ta, ma
 | 
						|
	vle8.v v0, (a1)
 | 
						|
	vle8.v v8, (a2)
 | 
						|
	sub a0, a0, a5
 | 
						|
	vxor.vv v0, v0, v8
 | 
						|
	vle8.v v16, (a3)
 | 
						|
	add a2, a2, a5
 | 
						|
	vxor.vv v0, v0, v16
 | 
						|
	vle8.v v24, (a4)
 | 
						|
	add a3, a3, a5
 | 
						|
	vxor.vv v16, v0, v24
 | 
						|
	add a4, a4, a5
 | 
						|
	vse8.v v16, (a1)
 | 
						|
	add a1, a1, a5
 | 
						|
	bnez a0, xor_regs_4_
 | 
						|
	ret
 | 
						|
SYM_FUNC_END(xor_regs_4_)
 | 
						|
EXPORT_SYMBOL(xor_regs_4_)
 | 
						|
 | 
						|
SYM_FUNC_START(xor_regs_5_)
 | 
						|
	vsetvli a6, a0, e8, m8, ta, ma
 | 
						|
	vle8.v v0, (a1)
 | 
						|
	vle8.v v8, (a2)
 | 
						|
	sub a0, a0, a6
 | 
						|
	vxor.vv v0, v0, v8
 | 
						|
	vle8.v v16, (a3)
 | 
						|
	add a2, a2, a6
 | 
						|
	vxor.vv v0, v0, v16
 | 
						|
	vle8.v v24, (a4)
 | 
						|
	add a3, a3, a6
 | 
						|
	vxor.vv v0, v0, v24
 | 
						|
	vle8.v v8, (a5)
 | 
						|
	add a4, a4, a6
 | 
						|
	vxor.vv v16, v0, v8
 | 
						|
	add a5, a5, a6
 | 
						|
	vse8.v v16, (a1)
 | 
						|
	add a1, a1, a6
 | 
						|
	bnez a0, xor_regs_5_
 | 
						|
	ret
 | 
						|
SYM_FUNC_END(xor_regs_5_)
 | 
						|
EXPORT_SYMBOL(xor_regs_5_)
 |