From 89079520cef65d6da1e864eab4464effe5396e23 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 11 Apr 2025 10:46:00 +0800 Subject: [PATCH 01/69] RISC-V: vDSO: Wire up getrandom() vDSO implementation Hook up the generic vDSO implementation to the generic vDSO getrandom implementation by providing the required __arch_chacha20_blocks_nostack and getrandom_syscall implementations. Also wire up the selftests. The benchmark result: vdso: 25000000 times in 2.466341333 seconds libc: 25000000 times in 41.447720005 seconds syscall: 25000000 times in 41.043926672 seconds vdso: 25000000 x 256 times in 162.286219353 seconds libc: 25000000 x 256 times in 2953.855018685 seconds syscall: 25000000 x 256 times in 2796.268546000 seconds Signed-off-by: Xi Ruoyao Link: https://lore.kernel.org/r/20250411024600.16045-1-xry111@xry111.site Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/vdso/getrandom.h | 30 +++ arch/riscv/kernel/vdso/Makefile | 12 + arch/riscv/kernel/vdso/getrandom.c | 10 + arch/riscv/kernel/vdso/vdso.lds.S | 1 + arch/riscv/kernel/vdso/vgetrandom-chacha.S | 244 ++++++++++++++++++ .../selftests/vDSO/vgetrandom-chacha.S | 2 + 7 files changed, 300 insertions(+) create mode 100644 arch/riscv/include/asm/vdso/getrandom.h create mode 100644 arch/riscv/kernel/vdso/getrandom.c create mode 100644 arch/riscv/kernel/vdso/vgetrandom-chacha.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..fbca724302ab 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -218,6 +218,7 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU + select VDSO_GETRANDOM if HAVE_GENERIC_VDSO select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..8dc92441702a --- /dev/null +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + */ +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include + +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register long ret asm("a0"); + register long nr asm("a7") = __NR_getrandom; + register void *buffer asm("a0") = _buffer; + register size_t len asm("a1") = _len; + register unsigned int flags asm("a2") = _flags; + + asm volatile ("ecall\n" + : "+r" (ret) + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) + : "memory"); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index ad73607abc28..7575ef088adc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -13,9 +13,17 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe +ifdef CONFIG_VDSO_GETRANDOM +vdso-syms += getrandom +endif + # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifdef CONFIG_VDSO_GETRANDOM +obj-vdso += vgetrandom-chacha.o +endif + ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -fno-builtin @@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) +endif + CFLAGS_hwprobe.o += -fPIC # Build rules diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c new file mode 100644 index 000000000000..f21922e8cebd --- /dev/null +++ b/arch/riscv/kernel/vdso/getrandom.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + */ +#include + +ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8e86965a8aae..abc69cda0445 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -80,6 +80,7 @@ VERSION #ifndef COMPAT_VDSO __vdso_riscv_hwprobe; #endif + __vdso_getrandom; local: *; }; } diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..d793cadc78a6 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + * + * Based on arch/loongarch/vdso/vgetrandom-chacha.S. + */ + +#include +#include + +.text + +.macro ROTRI rd rs imm + slliw t0, \rs, 32 - \imm + srliw \rd, \rs, \imm + or \rd, \rd, t0 +.endm + +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 +.endm + +/* + * a0: output bytes + * a1: 32-byte key input + * a2: 8-byte counter input/output + * a3: number of 64-byte blocks to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + +#define output a0 +#define key a1 +#define counter a2 +#define nblocks a3 +#define i a4 +#define state0 s0 +#define state1 s1 +#define state2 s2 +#define state3 s3 +#define state4 s4 +#define state5 s5 +#define state6 s6 +#define state7 s7 +#define state8 s8 +#define state9 s9 +#define state10 s10 +#define state11 s11 +#define state12 a5 +#define state13 a6 +#define state14 a7 +#define state15 t1 +#define cnt t2 +#define copy0 t3 +#define copy1 t4 +#define copy2 t5 +#define copy3 t6 + +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + + addi sp, sp, -12*SZREG + REG_S s0, (sp) + REG_S s1, SZREG(sp) + REG_S s2, 2*SZREG(sp) + REG_S s3, 3*SZREG(sp) + REG_S s4, 4*SZREG(sp) + REG_S s5, 5*SZREG(sp) + REG_S s6, 6*SZREG(sp) + REG_S s7, 7*SZREG(sp) + REG_S s8, 8*SZREG(sp) + REG_S s9, 9*SZREG(sp) + REG_S s10, 10*SZREG(sp) + REG_S s11, 11*SZREG(sp) + + ld cnt, (counter) + + li copy0, 0x61707865 + li copy1, 0x3320646e + li copy2, 0x79622d32 + li copy3, 0x6b206574 + +.Lblock: + /* state[0,1,2,3] = "expand 32-byte k" */ + mv state0, copy0 + mv state1, copy1 + mv state2, copy2 + mv state3, copy3 + + /* state[4,5,..,11] = key */ + lw state4, (key) + lw state5, 4(key) + lw state6, 8(key) + lw state7, 12(key) + lw state8, 16(key) + lw state9, 20(key) + lw state10, 24(key) + lw state11, 28(key) + + /* state[12,13] = counter */ + mv state12, cnt + srli state13, cnt, 32 + + /* state[14,15] = 0 */ + mv state14, zero + mv state15, zero + + li i, 10 +.Lpermute: + /* odd round */ + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _16 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _20 + + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _24 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _25 + + /* even round */ + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _16 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _20 + + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _24 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _25 + + addi i, i, -1 + bnez i, .Lpermute + + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ + OP_4REG addw line0, copy + sw state0, (output) + sw state1, 4(output) + sw state2, 8(output) + sw state3, 12(output) + + /* from now on state[0,1,2,3] are scratch registers */ + + /* state[0,1,2,3] = lo(key) */ + lw state0, (key) + lw state1, 4(key) + lw state2, 8(key) + lw state3, 12(key) + + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ + OP_4REG addw line1, line0 + sw state4, 16(output) + sw state5, 20(output) + sw state6, 24(output) + sw state7, 28(output) + + /* state[0,1,2,3] = hi(key) */ + lw state0, 16(key) + lw state1, 20(key) + lw state2, 24(key) + lw state3, 28(key) + + /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ + OP_4REG addw line2, line0 + sw state8, 32(output) + sw state9, 36(output) + sw state10, 40(output) + sw state11, 44(output) + + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ + addw state12, state12, cnt + srli state0, cnt, 32 + addw state13, state13, state0 + sw state12, 48(output) + sw state13, 52(output) + sw state14, 56(output) + sw state15, 60(output) + + /* ++counter */ + addi cnt, cnt, 1 + + /* output += 64 */ + addi output, output, 64 + /* --nblocks */ + addi nblocks, nblocks, -1 + bnez nblocks, .Lblock + + /* counter = [cnt_lo, cnt_hi] */ + sd cnt, (counter) + + /* Zero out the potentially sensitive regs, in case nothing uses these + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and + * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we + * only need to zero state[12,...,15]. + */ + mv state12, zero + mv state13, zero + mv state14, zero + mv state15, zero + + REG_L s0, (sp) + REG_L s1, SZREG(sp) + REG_L s2, 2*SZREG(sp) + REG_L s3, 3*SZREG(sp) + REG_L s4, 4*SZREG(sp) + REG_L s5, 5*SZREG(sp) + REG_L s6, 6*SZREG(sp) + REG_L s7, 7*SZREG(sp) + REG_L s8, 8*SZREG(sp) + REG_L s9, 9*SZREG(sp) + REG_L s10, 10*SZREG(sp) + REG_L s11, 11*SZREG(sp) + addi sp, sp, 12*SZREG + + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S index d6e09af7c0a9..a4a82e1c28a9 100644 --- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -11,6 +11,8 @@ #include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" #elif defined(__powerpc__) || defined(__powerpc64__) #include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__riscv) && __riscv_xlen == 64 +#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S" #elif defined(__s390x__) #include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" #elif defined(__x86_64__) From bafa451a96d0f1404aa1a5a267f78767a55fac71 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 15 Apr 2025 21:58:31 +0930 Subject: [PATCH 02/69] riscv: defconfig: Remove EXPERT The setting of EXPERT is a leftover from when the riscv defconfig was first added. As mentioned in the EXPERT Kconfig help text it is not intended to be set in the usual case. Upon removal a bunch of intrusive debug-related kernel options are no longer set, which is good. A few may want to come back in the future but let those be advocated for on a case by case basis. NAMESPACES, SYSFS_SYSCALL and MEDIA_SUPPORT_FILTER default on and thus fall out of the defconfig. Set VIDEO_CADENCE_CSI2RX=y to ensure VIDEO_CADENCE_CSI2RX stays enabled. Set DEBUG_KERNEL=y in line with other arch defconfigs. This turns on tracing. Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20250415122832.982610-1-joel@jms.id.au Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3c8e16d71e17..286f490ead37 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -18,12 +18,9 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y -CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_MICROCHIP=y CONFIG_ARCH_SIFIVE=y @@ -181,6 +178,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_GPIO=y CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_PLATFORM_SUPPORT=y CONFIG_VIDEO_CADENCE_CSI2RX=m CONFIG_DRM=m CONFIG_DRM_RADEON=m @@ -294,25 +292,7 @@ CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_PGFLAGS=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_DEBUG_PER_CPU_MAPS=y -CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_WQ_WATCHDOG=y -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_RWSEMS=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PLIST=y -CONFIG_DEBUG_SG=y -# CONFIG_RCU_TRACE is not set -CONFIG_RCU_EQS_DEBUG=y -# CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y From 61a74ad254628ccd9e88838c3c622885dfb6c588 Mon Sep 17 00:00:00 2001 From: Nylon Chen Date: Fri, 11 Apr 2025 15:38:50 +0800 Subject: [PATCH 03/69] riscv: misaligned: fix sleeping function called during misaligned access handling Use copy_from_user_nofault() and copy_to_user_nofault() instead of copy_from/to_user functions in the misaligned access trap handlers. The following bug report was found when executing misaligned memory accesses: BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:162 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 115, name: two preempt_count: 0, expected: 0 CPU: 0 UID: 0 PID: 115 Comm: two Not tainted 6.14.0-rc5 #24 Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x1c/0x24 [] show_stack+0x28/0x34 [] dump_stack_lvl+0x4a/0x68 [] dump_stack+0x14/0x1c [] __might_resched+0xfa/0x104 [] __might_sleep+0x3e/0x62 [] __might_fault+0x1c/0x24 [] _copy_from_user+0x28/0xaa [] handle_misaligned_store+0x204/0x254 [] do_trap_store_misaligned+0x24/0xee [] handle_exception+0x146/0x152 Fixes: b686ecdeacf6 ("riscv: misaligned: Restrict user access to kernel memory") Fixes: 441381506ba7 ("riscv: misaligned: remove CONFIG_RISCV_M_MODE specific code") Signed-off-by: Zong Li Signed-off-by: Nylon Chen Link: https://lore.kernel.org/r/20250411073850.3699180-3-nylon.chen@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 4354c87c0376..0173ed80818c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -441,7 +441,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user(&val, (u8 __user *)addr, len)) + if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -539,7 +539,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user((u8 __user *)addr, &val, len)) + if (copy_to_user_nofault((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); From 7b30b1b04e0d04e56e848c3b9c2952c30de05af9 Mon Sep 17 00:00:00 2001 From: Nylon Chen Date: Fri, 11 Apr 2025 15:38:49 +0800 Subject: [PATCH 04/69] riscv: misaligned: Add handling for ZCB instructions Add support for the Zcb extension's compressed half-word instructions (C.LHU, C.LH, and C.SH) in the RISC-V misaligned access trap handler. Signed-off-by: Zong Li Signed-off-by: Nylon Chen Link: https://lore.kernel.org/r/20250411073850.3699180-2-nylon.chen@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 0173ed80818c..7443632445d4 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -88,6 +88,13 @@ #define INSN_MATCH_C_FSWSP 0xe002 #define INSN_MASK_C_FSWSP 0xe003 +#define INSN_MATCH_C_LHU 0x8400 +#define INSN_MASK_C_LHU 0xfc43 +#define INSN_MATCH_C_LH 0x8440 +#define INSN_MASK_C_LH 0xfc43 +#define INSN_MATCH_C_SH 0x8c00 +#define INSN_MASK_C_SH 0xfc43 + #define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) #if defined(CONFIG_64BIT) @@ -431,6 +438,13 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) fp = 1; len = 4; #endif + } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) { + len = 2; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; } else { regs->epc = epc; return -1; @@ -530,6 +544,9 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) len = 4; val.data_ulong = GET_F32_RS2C(insn, regs); #endif + } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) { + len = 2; + val.data_ulong = GET_RS2S(insn, regs); } else { regs->epc = epc; return -1; From 788aa64c01f1262310b4c1fb827a36df170d86ea Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 10 Apr 2025 07:05:22 +0000 Subject: [PATCH 05/69] riscv: save the SR_SUM status over switches When threads/tasks are switched we need to ensure the old execution's SR_SUM state is saved and the new thread has the old SR_SUM state restored. The issue was seen under heavy load especially with the syz-stress tool running, with crashes as follows in schedule_tail: Unable to handle kernel access to user memory without uaccess routines at virtual address 000000002749f0d0 Oops [#1] Modules linked in: CPU: 1 PID: 4875 Comm: syz-executor.0 Not tainted 5.12.0-rc2-syzkaller-00467-g0d7588ab9ef9 #0 Hardware name: riscv-virtio,qemu (DT) epc : schedule_tail+0x72/0xb2 kernel/sched/core.c:4264 ra : task_pid_vnr include/linux/sched.h:1421 [inline] ra : schedule_tail+0x70/0xb2 kernel/sched/core.c:4264 epc : ffffffe00008c8b0 ra : ffffffe00008c8ae sp : ffffffe025d17ec0 gp : ffffffe005d25378 tp : ffffffe00f0d0000 t0 : 0000000000000000 t1 : 0000000000000001 t2 : 00000000000f4240 s0 : ffffffe025d17ee0 s1 : 000000002749f0d0 a0 : 000000000000002a a1 : 0000000000000003 a2 : 1ffffffc0cfac500 a3 : ffffffe0000c80cc a4 : 5ae9db91c19bbe00 a5 : 0000000000000000 a6 : 0000000000f00000 a7 : ffffffe000082eba s2 : 0000000000040000 s3 : ffffffe00eef96c0 s4 : ffffffe022c77fe0 s5 : 0000000000004000 s6 : ffffffe067d74e00 s7 : ffffffe067d74850 s8 : ffffffe067d73e18 s9 : ffffffe067d74e00 s10: ffffffe00eef96e8 s11: 000000ae6cdf8368 t3 : 5ae9db91c19bbe00 t4 : ffffffc4043cafb2 t5 : ffffffc4043cafba t6 : 0000000000040000 status: 0000000000000120 badaddr: 000000002749f0d0 cause: 000000000000000f Call Trace: [] schedule_tail+0x72/0xb2 kernel/sched/core.c:4264 [] ret_from_exception+0x0/0x14 Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace b5f8f9231dc87dda ]--- The issue comes from the put_user() in schedule_tail (kernel/sched/core.c) doing the following: asmlinkage __visible void schedule_tail(struct task_struct *prev) { ... if (current->set_child_tid) put_user(task_pid_vnr(current), current->set_child_tid); ... } the put_user() macro causes the code sequence to come out as follows: 1: __enable_user_access() 2: reg = task_pid_vnr(current); 3: *current->set_child_tid = reg; 4: __disable_user_access() The problem is that we may have a sleeping function as argument which could clear SR_SUM causing the panic above. This was fixed by evaluating the argument of the put_user() macro outside the user-enabled section in commit 285a76bb2cf5 ("riscv: evaluate put_user() arg before enabling user access")" In order for riscv to take advantage of unsafe_get/put_XXX() macros and to avoid the same issue we had with put_user() and sleeping functions we must ensure code flow can go through switch_to() from within a region of code with SR_SUM enabled and come back with SR_SUM still enabled. This patch addresses the problem allowing future work to enable full use of unsafe_get/put_XXX() macros without needing to take a CSR bit flip cost on every access. Make switch_to() save and restore SR_SUM. Reported-by: syzbot+e74b94fe601ab9552d69@syzkaller.appspotmail.com Signed-off-by: Ben Dooks Signed-off-by: Cyril Bur Reviewed-by: Alexandre Ghiti Reviewed-by: Deepak Gupta Link: https://lore.kernel.org/r/20250410070526.3160847-2-cyrilbur@tenstorrent.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 1 + arch/riscv/kernel/asm-offsets.c | 5 +++++ arch/riscv/kernel/entry.S | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 5f56eb9d114a..58fd11c89fe9 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -103,6 +103,7 @@ struct thread_struct { struct __riscv_d_ext_state fstate; unsigned long bad_cause; unsigned long envcfg; + unsigned long status; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 16490755304e..969c65b1fe41 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -34,6 +34,7 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + OFFSET(TASK_THREAD_STATUS, task_struct, thread.status); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); @@ -346,6 +347,10 @@ void asm_offsets(void) offsetof(struct task_struct, thread.s[11]) - offsetof(struct task_struct, thread.ra) ); + DEFINE(TASK_THREAD_STATUS_RA, + offsetof(struct task_struct, thread.status) + - offsetof(struct task_struct, thread.ra) + ); DEFINE(TASK_THREAD_F0_F0, offsetof(struct task_struct, thread.fstate.f[0]) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 33a5a9f2a0d4..00bd0de9faa2 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -397,9 +397,17 @@ SYM_FUNC_START(__switch_to) REG_S s9, TASK_THREAD_S9_RA(a3) REG_S s10, TASK_THREAD_S10_RA(a3) REG_S s11, TASK_THREAD_S11_RA(a3) + + /* save the user space access flag */ + li s0, SR_SUM + csrr s1, CSR_STATUS + REG_S s1, TASK_THREAD_STATUS_RA(a3) + /* Save the kernel shadow call stack pointer */ scs_save_current /* Restore context from next->thread */ + REG_L s0, TASK_THREAD_STATUS_RA(a4) + csrs CSR_STATUS, s0 REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4) REG_L s0, TASK_THREAD_S0_RA(a4) From 19500c6dbc5c348564a6513c801ab0889300565a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Apr 2025 07:05:23 +0000 Subject: [PATCH 06/69] riscv: implement user_access_begin() and families Currently, when a function like strncpy_from_user() is called, the userspace access protection is disabled and enabled for every word read. By implementing user_access_begin() and families, the protection is disabled at the beginning of the copy and enabled at the end. The __inttype macro is borrowed from x86 implementation. Signed-off-by: Jisheng Zhang Signed-off-by: Cyril Bur Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250410070526.3160847-3-cyrilbur@tenstorrent.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index fee56b0c8058..c9a461467bf4 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -61,6 +61,19 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne #define __disable_user_access() \ __asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory") +/* + * This is the smallest unsigned integer type that can fit a value + * (up to 'long long') + */ +#define __inttype(x) __typeof__( \ + __typefits(x, char, \ + __typefits(x, short, \ + __typefits(x, int, \ + __typefits(x, long, 0ULL))))) + +#define __typefits(x, type, not) \ + __builtin_choose_expr(sizeof(x) <= sizeof(type), (unsigned type)0, not) + /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is @@ -368,6 +381,69 @@ do { \ goto err_label; \ } while (0) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return 0; + __enable_user_access(); + return 1; +} +#define user_access_begin user_access_begin +#define user_access_end __disable_user_access + +static inline unsigned long user_access_save(void) { return 0UL; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_put_user(x, ptr, label) do { \ + long __err = 0; \ + __put_user_nocheck(x, (ptr), __err); \ + if (__err) \ + goto label; \ +} while (0) + +#define unsafe_get_user(x, ptr, label) do { \ + long __err = 0; \ + __inttype(*(ptr)) __gu_val; \ + __get_user_nocheck(__gu_val, (ptr), __err); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (__err) \ + goto label; \ +} while (0) + +#define unsafe_copy_loop(dst, src, len, type, op, label) \ + while (len >= sizeof(type)) { \ + op(*(type *)(src), (type __user *)(dst), label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst, _src, _len, label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, unsafe_put_user, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, unsafe_put_user, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, unsafe_put_user, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, unsafe_put_user, label); \ +} while (0) + +#define unsafe_copy_from_user(_dst, _src, _len, label) \ +do { \ + char *__ucu_dst = (_dst); \ + const char __user *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u64, unsafe_get_user, label); \ + unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u32, unsafe_get_user, label); \ + unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u16, unsafe_get_user, label); \ + unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u8, unsafe_get_user, label); \ +} while (0) + #else /* CONFIG_MMU */ #include #endif /* CONFIG_MMU */ From 62135bf660b2c3887e22f33d3adbefedb4dc9c7a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Apr 2025 07:05:24 +0000 Subject: [PATCH 07/69] riscv: uaccess: use input constraints for ptr of __put_user() Putting ptr in the inputs as opposed to output may seem incorrect but this is done for a few reasons: - Not having it in the output permits the use of asm goto in a subsequent patch. There are bugs in gcc [1] which would otherwise prevent it. - Since the output memory is userspace there isn't any real benefit from telling the compiler about the memory clobber. - x86, arm and powerpc all use this technique. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 # 1 Signed-off-by: Jisheng Zhang [Cyril Bur: Rewritten commit message] Signed-off-by: Cyril Bur Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250410070526.3160847-4-cyrilbur@tenstorrent.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index c9a461467bf4..da36057847f0 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -219,11 +219,11 @@ do { \ __typeof__(*(ptr)) __x = x; \ __asm__ __volatile__ ( \ "1:\n" \ - " " insn " %z2, %1\n" \ + " " insn " %z1, %2\n" \ "2:\n" \ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ - : "+r" (err), "=m" (*(ptr)) \ - : "rJ" (__x)); \ + : "+r" (err) \ + : "rJ" (__x), "m"(*(ptr))); \ } while (0) #ifdef CONFIG_64BIT @@ -236,16 +236,16 @@ do { \ u64 __x = (__typeof__((x)-(x)))(x); \ __asm__ __volatile__ ( \ "1:\n" \ - " sw %z3, %1\n" \ + " sw %z1, %3\n" \ "2:\n" \ - " sw %z4, %2\n" \ + " sw %z2, %4\n" \ "3:\n" \ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ - : "+r" (err), \ - "=m" (__ptr[__LSW]), \ - "=m" (__ptr[__MSW]) \ - : "rJ" (__x), "rJ" (__x >> 32)); \ + : "+r" (err) \ + : "rJ" (__x), "rJ" (__x >> 32), \ + "m" (__ptr[__LSW]), \ + "m" (__ptr[__MSW])); \ } while (0) #endif /* CONFIG_64BIT */ From cdf647e817143c9762c5bdf724ca2821a171f011 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Apr 2025 07:05:25 +0000 Subject: [PATCH 08/69] riscv: uaccess: use 'asm goto' for put_user() With 'asm goto' we don't need to test the error etc, the exception just jumps to the error handling directly. Because there are no output clobbers which could trigger gcc bugs [1] the use of asm_goto_output() macro is not necessary here. Not using asm_goto_output() is desirable as the generated output asm will be cleaner. Use of the volatile keyword is redundant as per gcc 14.2.0 manual section 6.48.2.7 Goto Labels: > Also note that an asm goto statement is always implicitly considered volatile. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 # 1 Signed-off-by: Jisheng Zhang [Cyril Bur: Rewritten commit message] Signed-off-by: Cyril Bur Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250410070526.3160847-5-cyrilbur@tenstorrent.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 71 +++++++++++++++----------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index da36057847f0..719c9179a751 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -214,61 +214,66 @@ do { \ ((x) = (__force __typeof__(x))0, -EFAULT); \ }) -#define __put_user_asm(insn, x, ptr, err) \ +#define __put_user_asm(insn, x, ptr, label) \ do { \ __typeof__(*(ptr)) __x = x; \ - __asm__ __volatile__ ( \ + asm goto( \ "1:\n" \ - " " insn " %z1, %2\n" \ - "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ - : "+r" (err) \ - : "rJ" (__x), "m"(*(ptr))); \ + " " insn " %z0, %1\n" \ + _ASM_EXTABLE(1b, %l2) \ + : : "rJ" (__x), "m"(*(ptr)) : : label); \ } while (0) #ifdef CONFIG_64BIT -#define __put_user_8(x, ptr, err) \ - __put_user_asm("sd", x, ptr, err) +#define __put_user_8(x, ptr, label) \ + __put_user_asm("sd", x, ptr, label) #else /* !CONFIG_64BIT */ -#define __put_user_8(x, ptr, err) \ +#define __put_user_8(x, ptr, label) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u64 __x = (__typeof__((x)-(x)))(x); \ - __asm__ __volatile__ ( \ + asm goto( \ "1:\n" \ - " sw %z1, %3\n" \ + " sw %z0, %2\n" \ "2:\n" \ - " sw %z2, %4\n" \ - "3:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ - _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ - : "+r" (err) \ - : "rJ" (__x), "rJ" (__x >> 32), \ + " sw %z1, %3\n" \ + _ASM_EXTABLE(1b, %l4) \ + _ASM_EXTABLE(2b, %l4) \ + : : "rJ" (__x), "rJ" (__x >> 32), \ "m" (__ptr[__LSW]), \ - "m" (__ptr[__MSW])); \ + "m" (__ptr[__MSW]) : : label); \ } while (0) #endif /* CONFIG_64BIT */ -#define __put_user_nocheck(x, __gu_ptr, __pu_err) \ +#define __put_user_nocheck(x, __gu_ptr, label) \ do { \ switch (sizeof(*__gu_ptr)) { \ case 1: \ - __put_user_asm("sb", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sb", (x), __gu_ptr, label); \ break; \ case 2: \ - __put_user_asm("sh", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sh", (x), __gu_ptr, label); \ break; \ case 4: \ - __put_user_asm("sw", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sw", (x), __gu_ptr, label); \ break; \ case 8: \ - __put_user_8((x), __gu_ptr, __pu_err); \ + __put_user_8((x), __gu_ptr, label); \ break; \ default: \ BUILD_BUG(); \ } \ } while (0) +#define __put_user_error(x, ptr, err) \ +do { \ + __label__ err_label; \ + __put_user_nocheck(x, ptr, err_label); \ + break; \ +err_label: \ + (err) = -EFAULT; \ +} while (0) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -299,7 +304,7 @@ do { \ __chk_user_ptr(__gu_ptr); \ \ __enable_user_access(); \ - __put_user_nocheck(__val, __gu_ptr, __pu_err); \ + __put_user_error(__val, __gu_ptr, __pu_err); \ __disable_user_access(); \ \ __pu_err; \ @@ -373,13 +378,7 @@ do { \ } while (0) #define __put_kernel_nofault(dst, src, type, err_label) \ -do { \ - long __kr_err = 0; \ - \ - __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label) static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) { @@ -398,12 +397,8 @@ static inline void user_access_restore(unsigned long enabled) { } * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_put_user(x, ptr, label) do { \ - long __err = 0; \ - __put_user_nocheck(x, (ptr), __err); \ - if (__err) \ - goto label; \ -} while (0) +#define unsafe_put_user(x, ptr, label) \ + __put_user_nocheck(x, (ptr), label) #define unsafe_get_user(x, ptr, label) do { \ long __err = 0; \ From f6bff7827a48e59cff1ef98aae72452d65174e0c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Apr 2025 07:05:26 +0000 Subject: [PATCH 09/69] riscv: uaccess: use 'asm_goto_output' for get_user() With 'asm goto' we don't need to test the error etc, the exception just jumps to the error handling directly. Unlike put_user(), get_user() must work around GCC bugs [1] when using output clobbers in an asm goto statement. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 # 1 Signed-off-by: Jisheng Zhang [Cyril Bur: Rewritten commit message] Signed-off-by: Cyril Bur Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250410070526.3160847-6-cyrilbur@tenstorrent.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 95 +++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 719c9179a751..87d01168f80a 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -96,27 +96,58 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne * call. */ -#define __get_user_asm(insn, x, ptr, err) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_user_asm(insn, x, ptr, label) \ + asm_goto_output( \ + "1:\n" \ + " " insn " %0, %1\n" \ + _ASM_EXTABLE_UACCESS_ERR(1b, %l2, %0) \ + : "=&r" (x) \ + : "m" (*(ptr)) : : label) +#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ +#define __get_user_asm(insn, x, ptr, label) \ do { \ - __typeof__(x) __x; \ + long __gua_err = 0; \ __asm__ __volatile__ ( \ "1:\n" \ " " insn " %1, %2\n" \ "2:\n" \ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ - : "+r" (err), "=&r" (__x) \ + : "+r" (__gua_err), "=&r" (x) \ : "m" (*(ptr))); \ - (x) = __x; \ + if (__gua_err) \ + goto label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #ifdef CONFIG_64BIT -#define __get_user_8(x, ptr, err) \ - __get_user_asm("ld", x, ptr, err) +#define __get_user_8(x, ptr, label) \ + __get_user_asm("ld", x, ptr, label) #else /* !CONFIG_64BIT */ -#define __get_user_8(x, ptr, err) \ + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_user_8(x, ptr, label) \ + u32 __user *__ptr = (u32 __user *)(ptr); \ + u32 __lo, __hi; \ + asm_goto_output( \ + "1:\n" \ + " lw %0, %2\n" \ + "2:\n" \ + " lw %1, %3\n" \ + _ASM_EXTABLE_UACCESS_ERR(1b, %l4, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, %l4, %0) \ + : "=&r" (__lo), "=r" (__hi) \ + : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]) \ + : : label); \ + (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ + (((u64)__hi << 32) | __lo))); \ + +#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ +#define __get_user_8(x, ptr, label) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ + long __gu8_err = 0; \ __asm__ __volatile__ ( \ "1:\n" \ " lw %1, %3\n" \ @@ -125,35 +156,51 @@ do { \ "3:\n" \ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \ - : "+r" (err), "=&r" (__lo), "=r" (__hi) \ + : "+r" (__gu8_err), "=&r" (__lo), "=r" (__hi) \ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \ - if (err) \ + if (__gu8_err) { \ __hi = 0; \ - (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ + goto label; \ + } \ + (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #endif /* CONFIG_64BIT */ -#define __get_user_nocheck(x, __gu_ptr, __gu_err) \ +#define __get_user_nocheck(x, __gu_ptr, label) \ do { \ switch (sizeof(*__gu_ptr)) { \ case 1: \ - __get_user_asm("lb", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lb", (x), __gu_ptr, label); \ break; \ case 2: \ - __get_user_asm("lh", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lh", (x), __gu_ptr, label); \ break; \ case 4: \ - __get_user_asm("lw", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lw", (x), __gu_ptr, label); \ break; \ case 8: \ - __get_user_8((x), __gu_ptr, __gu_err); \ + __get_user_8((x), __gu_ptr, label); \ break; \ default: \ BUILD_BUG(); \ } \ } while (0) +#define __get_user_error(x, ptr, err) \ +do { \ + __label__ __gu_failed; \ + \ + __get_user_nocheck(x, ptr, __gu_failed); \ + err = 0; \ + break; \ +__gu_failed: \ + x = 0; \ + err = -EFAULT; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -178,13 +225,16 @@ do { \ ({ \ const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ long __gu_err = 0; \ + __typeof__(x) __gu_val; \ \ __chk_user_ptr(__gu_ptr); \ \ __enable_user_access(); \ - __get_user_nocheck(x, __gu_ptr, __gu_err); \ + __get_user_error(__gu_val, __gu_ptr, __gu_err); \ __disable_user_access(); \ \ + (x) = __gu_val; \ + \ __gu_err; \ }) @@ -369,13 +419,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) } #define __get_kernel_nofault(dst, src, type, err_label) \ -do { \ - long __kr_err = 0; \ - \ - __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __get_user_nocheck(*((type *)(dst)), (type *)(src), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label) @@ -401,12 +445,9 @@ static inline void user_access_restore(unsigned long enabled) { } __put_user_nocheck(x, (ptr), label) #define unsafe_get_user(x, ptr, label) do { \ - long __err = 0; \ __inttype(*(ptr)) __gu_val; \ - __get_user_nocheck(__gu_val, (ptr), __err); \ + __get_user_nocheck(__gu_val, (ptr), label); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - if (__err) \ - goto label; \ } while (0) #define unsafe_copy_loop(dst, src, len, type, op, label) \ From 2940954c1ac527386e5203d4be8263d704491fbe Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Mon, 24 Feb 2025 19:20:40 +0800 Subject: [PATCH 10/69] riscv: vDSO: Remove --hash-style=both When RISC-V borned, DT_GNU_HASH had already became the de-facto standard so DT_HASH is just wasting storage space. Remove the explicit --hash-style=both setting and rely on the distro toolchain default, which is most likely "gnu" (i.e. generating only DT_GNU_HASH, no DT_HASH). Following the logic of commit 48f6430505c0 ("arm64/vdso: Remove --hash-style=sysv"). Signed-off-by: Xi Ruoyao Link: https://lore.kernel.org/r/20250224112042.60282-2-xry111@xry111.site Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 7575ef088adc..8d12f5646eb5 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -59,7 +59,7 @@ $(obj)/vdso.o: $(obj)/vdso.so $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold_and_check) LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ - --build-id=sha1 --hash-style=both --eh-frame-hdr + --build-id=sha1 --eh-frame-hdr # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S From 0f733b5be9658b75496127e23e1f7edfc45bb423 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:28 +0800 Subject: [PATCH 11/69] dt-bindings: riscv: Add xsfvqmaccdod and xsfvqmaccqoq ISA extension description Add "xsfvqmaccdod" and "xsfvqmaccqoq" ISA extensions which are provided by SiFive for int8 matrix multiplication instructions support. Signed-off-by: Cyan Yang Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250418053239.4351-2-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- .../devicetree/bindings/riscv/extensions.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index bcab59e0cc2e..d36e7c68d69a 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -662,6 +662,19 @@ properties: Registers in the AX45MP datasheet. https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf + # SiFive + - const: xsfvqmaccdod + description: + SiFive Int8 Matrix Multiplication Extensions Specification. + See more details in + https://www.sifive.com/document-file/sifive-int8-matrix-multiplication-extensions-specification + + - const: xsfvqmaccqoq + description: + SiFive Int8 Matrix Multiplication Extensions Specification. + See more details in + https://www.sifive.com/document-file/sifive-int8-matrix-multiplication-extensions-specification + # T-HEAD - const: xtheadvector description: From 2d147d77ae6e96c1c349a6ada0eac14111c3384a Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:29 +0800 Subject: [PATCH 12/69] riscv: Add SiFive xsfvqmaccdod and xsfvqmaccqoq vendor extensions Add SiFive vendor extension support to the kernel with the target of "xsfvqmaccdod" and "xsfvqmaccqoq". Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-3-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.vendor | 13 +++++++++++++ .../include/asm/vendor_extensions/sifive.h | 14 ++++++++++++++ arch/riscv/kernel/vendor_extensions.c | 10 ++++++++++ arch/riscv/kernel/vendor_extensions/Makefile | 1 + arch/riscv/kernel/vendor_extensions/sifive.c | 19 +++++++++++++++++++ 5 files changed, 57 insertions(+) create mode 100644 arch/riscv/include/asm/vendor_extensions/sifive.h create mode 100644 arch/riscv/kernel/vendor_extensions/sifive.c diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index b096548fe0ff..e14f26368963 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,6 +16,19 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "SiFive" +config RISCV_ISA_VENDOR_EXT_SIFIVE + bool "SiFive vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here if you want to disable all SiFive vendor extension + support. This will cause any SiFive vendor extensions that are + requested by hardware probing to be ignored. + + If you don't know what to do here, say Y. +endmenu + menu "T-Head" config RISCV_ISA_VENDOR_EXT_THEAD bool "T-Head vendor extension support" diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h new file mode 100644 index 000000000000..608004250e2e --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/sifive.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H + +#include + +#include + +#define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0 +#define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive; + +#endif diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index 9feb7f67a0a3..92d8ff81f42c 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + &riscv_isa_vendor_ext_list_sifive, +#endif #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD &riscv_isa_vendor_ext_list_thead, #endif @@ -45,6 +49,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + case SIFIVE_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap; + break; + #endif #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD case THEAD_VENDOR_ID: bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 866414c81a9f..d5fdde0e863b 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c new file mode 100644 index 000000000000..990ac83b1f81 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include +#include + +/* All SiFive vendor extensions supported in Linux */ +const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { + __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), + __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive), + .ext_data = riscv_isa_vendor_ext_sifive, +}; From e8fd215ed0eb814486d50b4835007cbc50b2c2b7 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:30 +0800 Subject: [PATCH 13/69] riscv: hwprobe: Document SiFive xsfvqmaccdod and xsfvqmaccqoq vendor extensions Document the support for sifive vendor extensions using the key RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 and two vendor extensions for SiFive Int8 Matrix Multiplication Instructions using RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD and RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-4-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 14 ++++++++++++++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 53607d962653..16085b2ee64e 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -335,3 +335,17 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which represents the size of the Zicbom block in bytes. + +* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0`: A bitmask containing the + sifive vendor extensions that are compatible with the + :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior. + + * SIFIVE + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD`: The Xsfqmaccdod vendor + extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication + Extensions Specification. + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor + extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication + Instruction Extensions Specification. diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1f690fea0e03..1c6977305776 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 12 +#define RISCV_HWPROBE_MAX_KEY 13 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3c2fce939673..9c70101f021b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -104,6 +104,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 #define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 #define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 13 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ From 1a6274f035346e76835d46096136dd3e6cca9575 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:31 +0800 Subject: [PATCH 14/69] riscv: hwprobe: Add SiFive vendor extension support and probe for xsfqmaccdod and xsfqmaccqoq Add a new hwprobe key "RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0" which allows userspace to probe for the new vendor extensions from SiFive. Also, add new hwprobe for SiFive "xsfvqmaccdod" and "xsfvqmaccqoq" vendor extensions. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-5-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwprobe.h | 1 + .../asm/vendor_extensions/sifive_hwprobe.h | 19 ++++++++++++++++++ arch/riscv/include/uapi/asm/vendor/sifive.h | 4 ++++ arch/riscv/kernel/sys_hwprobe.c | 5 +++++ arch/riscv/kernel/vendor_extensions/Makefile | 1 + .../kernel/vendor_extensions/sifive_hwprobe.c | 20 +++++++++++++++++++ 6 files changed, 50 insertions(+) create mode 100644 arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h create mode 100644 arch/riscv/include/uapi/asm/vendor/sifive.h create mode 100644 arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1c6977305776..7fe0a379474a 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -22,6 +22,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0: return true; } diff --git a/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h new file mode 100644 index 000000000000..90a61abd033c --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H + +#include + +#include + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE +void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h new file mode 100644 index 000000000000..f25d8cf110d1 --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/sifive.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 249aec8594a9..138e74f05de7 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -300,6 +301,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0: + hwprobe_isa_vendor_ext_sifive_0(pair, cpus); + break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: hwprobe_isa_vendor_ext_thead_0(pair, cpus); break; diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index d5fdde0e863b..a4eca96d1c8a 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c new file mode 100644 index 000000000000..461ce0f305ce --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include +#include + +#include +#include + +void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XSFVQMACCDOD); + VENDOR_EXT_KEY(XSFVQMACCQOQ); + }); +} From a5a15e07cbb900b59fbdb927189d24d1d01ad2e7 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:32 +0800 Subject: [PATCH 15/69] dt-bindings: riscv: Add xsfvfnrclipxfqf ISA extension description Add "xsfvfnrclipxfqf" ISA extension which is provided by SiFive for FP32-to-int8 ranged clip instructions support. Signed-off-by: Cyan Yang Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250418053239.4351-6-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index d36e7c68d69a..be203df29eb8 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -675,6 +675,12 @@ properties: See more details in https://www.sifive.com/document-file/sifive-int8-matrix-multiplication-extensions-specification + - const: xsfvfnrclipxfqf + description: + SiFive FP32-to-int8 Ranged Clip Instructions Extensions Specification. + See more details in + https://www.sifive.com/document-file/fp32-to-int8-ranged-clip-instructions + # T-HEAD - const: xtheadvector description: From e84fffe21b7498ff50aed3a96773993d04cfaed0 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:33 +0800 Subject: [PATCH 16/69] riscv: Add SiFive xsfvfnrclipxfqf vendor extension Add SiFive vendor extension "xsfvfnrclipxfqf" support to the kernel. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-7-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vendor_extensions/sifive.h | 1 + arch/riscv/kernel/vendor_extensions/sifive.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h index 608004250e2e..2d05e3e73170 100644 --- a/arch/riscv/include/asm/vendor_extensions/sifive.h +++ b/arch/riscv/include/asm/vendor_extensions/sifive.h @@ -8,6 +8,7 @@ #define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0 #define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1 +#define RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF 2 extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive; diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c index 990ac83b1f81..077315e5b2d7 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive.c +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -9,6 +9,7 @@ /* All SiFive vendor extensions supported in Linux */ const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { + __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ), }; From 659d664f7df8e5c094e260434bebd0efaa547e49 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:34 +0800 Subject: [PATCH 17/69] riscv: hwprobe: Document SiFive xsfvfnrclipxfqf vendor extension Document the support for SiFive vendor extensions for FP32-to-int8 Ranged Clip Instructions using RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-8-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 16085b2ee64e..e15405e12239 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -349,3 +349,7 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication Instruction Extensions Specification. + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf + vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged + Clip Instructions Extensions Specification. From 1d91224394c92245942c402245370c4abb0fcbfb Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:35 +0800 Subject: [PATCH 18/69] riscv: hwprobe: Add SiFive xsfvfnrclipxfqf vendor extension Add hwprobe for SiFive "xsfvfnrclipxfqf" vendor extension. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-9-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/vendor/sifive.h | 1 + arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h index f25d8cf110d1..b772d4631284 100644 --- a/arch/riscv/include/uapi/asm/vendor/sifive.h +++ b/arch/riscv/include/uapi/asm/vendor/sifive.h @@ -2,3 +2,4 @@ #define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0) #define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF (1 << 2) diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c index 461ce0f305ce..2b9505079a9f 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c +++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c @@ -16,5 +16,6 @@ void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cp riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, { VENDOR_EXT_KEY(XSFVQMACCDOD); VENDOR_EXT_KEY(XSFVQMACCQOQ); + VENDOR_EXT_KEY(XSFVFNRCLIPXFQF); }); } From d5ca02b25f5dbe44a25afe35cd75d49f1f0b9763 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:36 +0800 Subject: [PATCH 19/69] dt-bindings: riscv: Add xsfvfwmaccqqq ISA extension description Add "xsfvfwmaccqqq" ISA extension which is provided by SiFive for matrix multiply accumulate instructions support. Signed-off-by: Cyan Yang Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250418053239.4351-10-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index be203df29eb8..ede6a58ccf53 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -681,6 +681,12 @@ properties: See more details in https://www.sifive.com/document-file/fp32-to-int8-ranged-clip-instructions + - const: xsfvfwmaccqqq + description: + SiFive Matrix Multiply Accumulate Instruction Extensions Specification. + See more details in + https://www.sifive.com/document-file/matrix-multiply-accumulate-instruction + # T-HEAD - const: xtheadvector description: From 34e9b16b4b888988730ffab9a9039cfcf305942e Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:37 +0800 Subject: [PATCH 20/69] riscv: Add SiFive xsfvfwmaccqqq vendor extension Add SiFive vendor extension "xsfvfwmaccqqq" support to the kernel. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-11-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vendor_extensions/sifive.h | 1 + arch/riscv/kernel/vendor_extensions/sifive.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h index 2d05e3e73170..ac00e500361c 100644 --- a/arch/riscv/include/asm/vendor_extensions/sifive.h +++ b/arch/riscv/include/asm/vendor_extensions/sifive.h @@ -9,6 +9,7 @@ #define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0 #define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1 #define RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF 2 +#define RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ 3 extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive; diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c index 077315e5b2d7..1411337dc1e6 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive.c +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -10,6 +10,7 @@ /* All SiFive vendor extensions supported in Linux */ const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), + __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ), }; From a3ca43dc527159aa6f55058a9fa506fa720d6514 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:38 +0800 Subject: [PATCH 21/69] riscv: hwprobe: Document SiFive xsfvfwmaccqqq vendor extension Document the support for matrix multiply accumulate instruction from SiFive using RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-12-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index e15405e12239..7c11351b1383 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -353,3 +353,7 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged Clip Instructions Extensions Specification. + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq + vendor extension is supported in version 1.0 of Matrix Multiply Accumulate + Instruction Extensions Specification. \ No newline at end of file From d9669e33c8fadb5f81287f4961f01e40c0a11c23 Mon Sep 17 00:00:00 2001 From: Cyan Yang Date: Fri, 18 Apr 2025 13:32:39 +0800 Subject: [PATCH 22/69] riscv: hwprobe: Add SiFive xsfvfwmaccqqq vendor extension Add hwprobe for SiFive "xsfvfwmaccqqq" vendor extension. Signed-off-by: Cyan Yang Link: https://lore.kernel.org/r/20250418053239.4351-13-cyan.yang@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/vendor/sifive.h | 1 + arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h index b772d4631284..9f3278a4b298 100644 --- a/arch/riscv/include/uapi/asm/vendor/sifive.h +++ b/arch/riscv/include/uapi/asm/vendor/sifive.h @@ -3,3 +3,4 @@ #define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0) #define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1) #define RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF (1 << 2) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ (1 << 3) diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c index 2b9505079a9f..1f77f6309763 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c +++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c @@ -17,5 +17,6 @@ void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cp VENDOR_EXT_KEY(XSFVQMACCDOD); VENDOR_EXT_KEY(XSFVQMACCQOQ); VENDOR_EXT_KEY(XSFVFNRCLIPXFQF); + VENDOR_EXT_KEY(XSFVFWMACCQQQ); }); } From 2f956db8b3b02256b21da4d1f26fedc63782adff Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 2 Jun 2025 10:33:15 -0700 Subject: [PATCH 23/69] Revert "RISC-V: vDSO: Wire up getrandom() vDSO implementation" This has been on -next for a bit, but it's broken and there's already a v2. So I'm reverting it to avoid more rebasing. This reverts commit 89079520cef65d6da1e864eab4464effe5396e23. Link: https://lore.kernel.org/r/20250602173315.20228-1-palmer@dabbelt.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - arch/riscv/include/asm/vdso/getrandom.h | 30 --- arch/riscv/kernel/vdso/Makefile | 12 - arch/riscv/kernel/vdso/getrandom.c | 10 - arch/riscv/kernel/vdso/vdso.lds.S | 1 - arch/riscv/kernel/vdso/vgetrandom-chacha.S | 244 ------------------ .../selftests/vDSO/vgetrandom-chacha.S | 2 - 7 files changed, 300 deletions(-) delete mode 100644 arch/riscv/include/asm/vdso/getrandom.h delete mode 100644 arch/riscv/kernel/vdso/getrandom.c delete mode 100644 arch/riscv/kernel/vdso/vgetrandom-chacha.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fbca724302ab..bbec87b79309 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -218,7 +218,6 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU - select VDSO_GETRANDOM if HAVE_GENERIC_VDSO select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h deleted file mode 100644 index 8dc92441702a..000000000000 --- a/arch/riscv/include/asm/vdso/getrandom.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. - */ -#ifndef __ASM_VDSO_GETRANDOM_H -#define __ASM_VDSO_GETRANDOM_H - -#ifndef __ASSEMBLY__ - -#include - -static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) -{ - register long ret asm("a0"); - register long nr asm("a7") = __NR_getrandom; - register void *buffer asm("a0") = _buffer; - register size_t len asm("a1") = _len; - register unsigned int flags asm("a2") = _flags; - - asm volatile ("ecall\n" - : "+r" (ret) - : "r" (nr), "r" (buffer), "r" (len), "r" (flags) - : "memory"); - - return ret; -} - -#endif /* !__ASSEMBLY__ */ - -#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 8d12f5646eb5..4a5d131506fc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -13,17 +13,9 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe -ifdef CONFIG_VDSO_GETRANDOM -vdso-syms += getrandom -endif - # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o -ifdef CONFIG_VDSO_GETRANDOM -obj-vdso += vgetrandom-chacha.o -endif - ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -fno-builtin @@ -32,10 +24,6 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif -ifneq ($(c-getrandom-y),) - CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) -endif - CFLAGS_hwprobe.o += -fPIC # Build rules diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c deleted file mode 100644 index f21922e8cebd..000000000000 --- a/arch/riscv/kernel/vdso/getrandom.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. - */ -#include - -ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) -{ - return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); -} diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index abc69cda0445..8e86965a8aae 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -80,7 +80,6 @@ VERSION #ifndef COMPAT_VDSO __vdso_riscv_hwprobe; #endif - __vdso_getrandom; local: *; }; } diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S deleted file mode 100644 index d793cadc78a6..000000000000 --- a/arch/riscv/kernel/vdso/vgetrandom-chacha.S +++ /dev/null @@ -1,244 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. - * - * Based on arch/loongarch/vdso/vgetrandom-chacha.S. - */ - -#include -#include - -.text - -.macro ROTRI rd rs imm - slliw t0, \rs, 32 - \imm - srliw \rd, \rs, \imm - or \rd, \rd, t0 -.endm - -.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 - \op \d0, \d0, \s0 - \op \d1, \d1, \s1 - \op \d2, \d2, \s2 - \op \d3, \d3, \s3 -.endm - -/* - * a0: output bytes - * a1: 32-byte key input - * a2: 8-byte counter input/output - * a3: number of 64-byte blocks to write to output - */ -SYM_FUNC_START(__arch_chacha20_blocks_nostack) - -#define output a0 -#define key a1 -#define counter a2 -#define nblocks a3 -#define i a4 -#define state0 s0 -#define state1 s1 -#define state2 s2 -#define state3 s3 -#define state4 s4 -#define state5 s5 -#define state6 s6 -#define state7 s7 -#define state8 s8 -#define state9 s9 -#define state10 s10 -#define state11 s11 -#define state12 a5 -#define state13 a6 -#define state14 a7 -#define state15 t1 -#define cnt t2 -#define copy0 t3 -#define copy1 t4 -#define copy2 t5 -#define copy3 t6 - -/* Packs to be used with OP_4REG */ -#define line0 state0, state1, state2, state3 -#define line1 state4, state5, state6, state7 -#define line2 state8, state9, state10, state11 -#define line3 state12, state13, state14, state15 - -#define line1_perm state5, state6, state7, state4 -#define line2_perm state10, state11, state8, state9 -#define line3_perm state15, state12, state13, state14 - -#define copy copy0, copy1, copy2, copy3 - -#define _16 16, 16, 16, 16 -#define _20 20, 20, 20, 20 -#define _24 24, 24, 24, 24 -#define _25 25, 25, 25, 25 - - addi sp, sp, -12*SZREG - REG_S s0, (sp) - REG_S s1, SZREG(sp) - REG_S s2, 2*SZREG(sp) - REG_S s3, 3*SZREG(sp) - REG_S s4, 4*SZREG(sp) - REG_S s5, 5*SZREG(sp) - REG_S s6, 6*SZREG(sp) - REG_S s7, 7*SZREG(sp) - REG_S s8, 8*SZREG(sp) - REG_S s9, 9*SZREG(sp) - REG_S s10, 10*SZREG(sp) - REG_S s11, 11*SZREG(sp) - - ld cnt, (counter) - - li copy0, 0x61707865 - li copy1, 0x3320646e - li copy2, 0x79622d32 - li copy3, 0x6b206574 - -.Lblock: - /* state[0,1,2,3] = "expand 32-byte k" */ - mv state0, copy0 - mv state1, copy1 - mv state2, copy2 - mv state3, copy3 - - /* state[4,5,..,11] = key */ - lw state4, (key) - lw state5, 4(key) - lw state6, 8(key) - lw state7, 12(key) - lw state8, 16(key) - lw state9, 20(key) - lw state10, 24(key) - lw state11, 28(key) - - /* state[12,13] = counter */ - mv state12, cnt - srli state13, cnt, 32 - - /* state[14,15] = 0 */ - mv state14, zero - mv state15, zero - - li i, 10 -.Lpermute: - /* odd round */ - OP_4REG addw line0, line1 - OP_4REG xor line3, line0 - OP_4REG ROTRI line3, _16 - - OP_4REG addw line2, line3 - OP_4REG xor line1, line2 - OP_4REG ROTRI line1, _20 - - OP_4REG addw line0, line1 - OP_4REG xor line3, line0 - OP_4REG ROTRI line3, _24 - - OP_4REG addw line2, line3 - OP_4REG xor line1, line2 - OP_4REG ROTRI line1, _25 - - /* even round */ - OP_4REG addw line0, line1_perm - OP_4REG xor line3_perm, line0 - OP_4REG ROTRI line3_perm, _16 - - OP_4REG addw line2_perm, line3_perm - OP_4REG xor line1_perm, line2_perm - OP_4REG ROTRI line1_perm, _20 - - OP_4REG addw line0, line1_perm - OP_4REG xor line3_perm, line0 - OP_4REG ROTRI line3_perm, _24 - - OP_4REG addw line2_perm, line3_perm - OP_4REG xor line1_perm, line2_perm - OP_4REG ROTRI line1_perm, _25 - - addi i, i, -1 - bnez i, .Lpermute - - /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ - OP_4REG addw line0, copy - sw state0, (output) - sw state1, 4(output) - sw state2, 8(output) - sw state3, 12(output) - - /* from now on state[0,1,2,3] are scratch registers */ - - /* state[0,1,2,3] = lo(key) */ - lw state0, (key) - lw state1, 4(key) - lw state2, 8(key) - lw state3, 12(key) - - /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ - OP_4REG addw line1, line0 - sw state4, 16(output) - sw state5, 20(output) - sw state6, 24(output) - sw state7, 28(output) - - /* state[0,1,2,3] = hi(key) */ - lw state0, 16(key) - lw state1, 20(key) - lw state2, 24(key) - lw state3, 28(key) - - /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ - OP_4REG addw line2, line0 - sw state8, 32(output) - sw state9, 36(output) - sw state10, 40(output) - sw state11, 44(output) - - /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ - addw state12, state12, cnt - srli state0, cnt, 32 - addw state13, state13, state0 - sw state12, 48(output) - sw state13, 52(output) - sw state14, 56(output) - sw state15, 60(output) - - /* ++counter */ - addi cnt, cnt, 1 - - /* output += 64 */ - addi output, output, 64 - /* --nblocks */ - addi nblocks, nblocks, -1 - bnez nblocks, .Lblock - - /* counter = [cnt_lo, cnt_hi] */ - sd cnt, (counter) - - /* Zero out the potentially sensitive regs, in case nothing uses these - * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and - * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we - * only need to zero state[12,...,15]. - */ - mv state12, zero - mv state13, zero - mv state14, zero - mv state15, zero - - REG_L s0, (sp) - REG_L s1, SZREG(sp) - REG_L s2, 2*SZREG(sp) - REG_L s3, 3*SZREG(sp) - REG_L s4, 4*SZREG(sp) - REG_L s5, 5*SZREG(sp) - REG_L s6, 6*SZREG(sp) - REG_L s7, 7*SZREG(sp) - REG_L s8, 8*SZREG(sp) - REG_L s9, 9*SZREG(sp) - REG_L s10, 10*SZREG(sp) - REG_L s11, 11*SZREG(sp) - addi sp, sp, 12*SZREG - - ret -SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S index a4a82e1c28a9..d6e09af7c0a9 100644 --- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -11,8 +11,6 @@ #include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" #elif defined(__powerpc__) || defined(__powerpc64__) #include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" -#elif defined(__riscv) && __riscv_xlen == 64 -#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S" #elif defined(__s390x__) #include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" #elif defined(__x86_64__) From cf8651f7319d12a6fd81e1d001afb0958ee2bf28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:18 +0200 Subject: [PATCH 24/69] riscv: sbi: add Firmware Feature (FWFT) SBI extensions definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Firmware Features extension (FWFT) was added as part of the SBI 3.0 specification. Add SBI definitions to use this extension. Signed-off-by: Clément Léger Reviewed-by: Samuel Holland Tested-by: Samuel Holland Reviewed-by: Deepak Gupta Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20250523101932.1594077-2-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 3d250824178b..bb077d0c912f 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -35,6 +35,7 @@ enum sbi_ext_id { SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, SBI_EXT_NACL = 0x4E41434C, + SBI_EXT_FWFT = 0x46574654, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -402,6 +403,33 @@ enum sbi_ext_nacl_feature { #define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) #define SBI_NACL_SHMEM_SRET_X_LAST 31 +/* SBI function IDs for FW feature extension */ +#define SBI_EXT_FWFT_SET 0x0 +#define SBI_EXT_FWFT_GET 0x1 + +enum sbi_fwft_feature_t { + SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0, + SBI_FWFT_LANDING_PAD = 0x1, + SBI_FWFT_SHADOW_STACK = 0x2, + SBI_FWFT_DOUBLE_TRAP = 0x3, + SBI_FWFT_PTE_AD_HW_UPDATING = 0x4, + SBI_FWFT_POINTER_MASKING_PMLEN = 0x5, + SBI_FWFT_LOCAL_RESERVED_START = 0x6, + SBI_FWFT_LOCAL_RESERVED_END = 0x3fffffff, + SBI_FWFT_LOCAL_PLATFORM_START = 0x40000000, + SBI_FWFT_LOCAL_PLATFORM_END = 0x7fffffff, + + SBI_FWFT_GLOBAL_RESERVED_START = 0x80000000, + SBI_FWFT_GLOBAL_RESERVED_END = 0xbfffffff, + SBI_FWFT_GLOBAL_PLATFORM_START = 0xc0000000, + SBI_FWFT_GLOBAL_PLATFORM_END = 0xffffffff, +}; + +#define SBI_FWFT_PLATFORM_FEATURE_BIT BIT(30) +#define SBI_FWFT_GLOBAL_FEATURE_BIT BIT(31) + +#define SBI_FWFT_SET_FLAG_LOCK BIT(0) + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 @@ -419,6 +447,11 @@ enum sbi_ext_nacl_feature { #define SBI_ERR_ALREADY_STARTED -7 #define SBI_ERR_ALREADY_STOPPED -8 #define SBI_ERR_NO_SHMEM -9 +#define SBI_ERR_INVALID_STATE -10 +#define SBI_ERR_BAD_RANGE -11 +#define SBI_ERR_TIMEOUT -12 +#define SBI_ERR_IO -13 +#define SBI_ERR_DENIED_LOCKED -14 extern unsigned long sbi_spec_version; struct sbiret { From a7cd450f0e06b4118b2ca8b4e1ef707d5c2c4506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:19 +0200 Subject: [PATCH 25/69] riscv: sbi: remove useless parenthesis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few parenthesis in check for SBI version/extension were useless, remove them. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20250523101932.1594077-3-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1989b8cade1b..1d44c35305a9 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -609,7 +609,7 @@ void __init sbi_init(void) } else { __sbi_rfence = __sbi_rfence_v01; } - if ((sbi_spec_version >= sbi_mk_version(0, 3)) && + if (sbi_spec_version >= sbi_mk_version(0, 3) && sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); pm_power_off = sbi_srst_power_off; @@ -617,8 +617,8 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } - if ((sbi_spec_version >= sbi_mk_version(2, 0)) && - (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_DBCN) > 0) { pr_info("SBI DBCN extension detected\n"); sbi_debug_console_available = true; } From 99cf5b7c738733032af9a265a6a5a6bc34b91900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:20 +0200 Subject: [PATCH 26/69] riscv: sbi: add new SBI error mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few new errors have been added with SBI V3.0, maps them as close as possible to errno values. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20250523101932.1594077-4-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index bb077d0c912f..0938f2a8d01b 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -536,11 +536,21 @@ static inline int sbi_err_map_linux_errno(int err) case SBI_SUCCESS: return 0; case SBI_ERR_DENIED: + case SBI_ERR_DENIED_LOCKED: return -EPERM; case SBI_ERR_INVALID_PARAM: + case SBI_ERR_INVALID_STATE: return -EINVAL; + case SBI_ERR_BAD_RANGE: + return -ERANGE; case SBI_ERR_INVALID_ADDRESS: return -EFAULT; + case SBI_ERR_NO_SHMEM: + return -ENOMEM; + case SBI_ERR_TIMEOUT: + return -ETIMEDOUT; + case SBI_ERR_IO: + return -EIO; case SBI_ERR_NOT_SUPPORTED: case SBI_ERR_FAILURE: default: From 6d6d0641dcfa9d1e398d75791283bf6d129135de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:21 +0200 Subject: [PATCH 27/69] riscv: sbi: add FWFT extension interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This SBI extensions enables supervisor mode to control feature that are under M-mode control (For instance, Svadu menvcfg ADUE bit, Ssdbltrp DTE, etc). Add an interface to set local features for a specific cpu mask as well as for the online cpu mask. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20250523101932.1594077-5-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 17 +++++++++++ arch/riscv/kernel/sbi.c | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 0938f2a8d01b..341e74238aa0 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -503,6 +503,23 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long asid); long sbi_probe_extension(int ext); +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags); +int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, + unsigned long value, unsigned long flags); +/** + * sbi_fwft_set_online_cpus() - Set a feature on all online cpus + * @feature: The feature to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +static inline int sbi_fwft_set_online_cpus(u32 feature, unsigned long value, + unsigned long flags) +{ + return sbi_fwft_set_cpumask(cpu_online_mask, feature, value, flags); +} + /* Check if current SBI specification version is 0.1 or not */ static inline int sbi_spec_is_0_1(void) { diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1d44c35305a9..818efafdc8e9 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -299,6 +299,63 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, return 0; } +struct fwft_set_req { + u32 feature; + unsigned long value; + unsigned long flags; + atomic_t error; +}; + +static void cpu_sbi_fwft_set(void *arg) +{ + struct fwft_set_req *req = arg; + int ret; + + ret = sbi_fwft_set(req->feature, req->value, req->flags); + if (ret) + atomic_set(&req->error, ret); +} + +/** + * sbi_fwft_set() - Set a feature on the local hart + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) +{ + return -EOPNOTSUPP; +} + +/** + * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask + * @mask: CPU mask of cpus that need the feature to be set + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, + unsigned long value, unsigned long flags) +{ + struct fwft_set_req req = { + .feature = feature, + .value = value, + .flags = flags, + .error = ATOMIC_INIT(0), + }; + + if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT) + return -EINVAL; + + on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1); + + return atomic_read(&req.error); +} + /** * sbi_set_timer() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. From c4a50db1e1739a5d4698dee7cd4c6f6430bff7b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:22 +0200 Subject: [PATCH 28/69] riscv: sbi: add SBI FWFT extension calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add FWFT extension calls. This will be ratified in SBI V3.0 hence, it is provided as a separate commit that can be left out if needed. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20250523101932.1594077-6-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 818efafdc8e9..53836a9235e3 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -299,6 +299,8 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, return 0; } +static bool sbi_fwft_supported; + struct fwft_set_req { u32 feature; unsigned long value; @@ -326,7 +328,15 @@ static void cpu_sbi_fwft_set(void *arg) */ int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) { - return -EOPNOTSUPP; + struct sbiret ret; + + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + + ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET, + feature, value, flags, 0, 0, 0); + + return sbi_err_map_linux_errno(ret.error); } /** @@ -348,6 +358,9 @@ int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, .error = ATOMIC_INIT(0), }; + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT) return -EINVAL; @@ -679,6 +692,11 @@ void __init sbi_init(void) pr_info("SBI DBCN extension detected\n"); sbi_debug_console_available = true; } + if (sbi_spec_version >= sbi_mk_version(3, 0) && + sbi_probe_extension(SBI_EXT_FWFT)) { + pr_info("SBI FWFT extension detected\n"); + sbi_fwft_supported = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; From cf5a8abc6560f989a880bec3647c614e638a0c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:23 +0200 Subject: [PATCH 29/69] riscv: misaligned: request misaligned exception from SBI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the kernel can handle misaligned accesses in S-mode, request misaligned access exception delegation from SBI. This uses the FWFT SBI extension defined in SBI version 3.0. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250523101932.1594077-7-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 3 +- arch/riscv/kernel/traps_misaligned.c | 71 +++++++++++++++++++++- arch/riscv/kernel/unaligned_access_speed.c | 8 ++- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index f56b409361fb..dbe5970d4fe6 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -67,8 +67,9 @@ void __init riscv_user_isa_enable(void); _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) bool __init check_unaligned_access_emulated_all_cpus(void); +void unaligned_access_init(void); +int cpu_online_unaligned_access_init(unsigned int cpu); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) -void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); bool unaligned_ctl_available(void); DECLARE_PER_CPU(long, misaligned_access_speed); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 77c788660223..592b1a28e897 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define INSN_MATCH_LB 0x3 @@ -646,7 +647,7 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) static bool unaligned_ctl __read_mostly; -void check_unaligned_access_emulated(struct work_struct *work __always_unused) +static void check_unaligned_access_emulated(struct work_struct *work __always_unused) { int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); @@ -657,6 +658,13 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); +} + +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + + check_unaligned_access_emulated(NULL); /* * If unaligned_ctl is already set, this means that we detected that all @@ -665,9 +673,10 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) */ if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); - while (true) - cpu_relax(); + return -EINVAL; } + + return 0; } bool __init check_unaligned_access_emulated_all_cpus(void) @@ -699,4 +708,60 @@ bool __init check_unaligned_access_emulated_all_cpus(void) { return false; } +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + return 0; +} #endif + +#ifdef CONFIG_RISCV_SBI + +static bool misaligned_traps_delegated; + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu) +{ + if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) && + misaligned_traps_delegated) { + pr_crit("Misaligned trap delegation non homogeneous (expected delegated)"); + return -EINVAL; + } + + return 0; +} + +void __init unaligned_access_init(void) +{ + int ret; + + ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0); + if (ret) + return; + + misaligned_traps_delegated = true; + pr_info("SBI misaligned access exception delegation ok\n"); + /* + * Note that we don't have to take any specific action here, if + * the delegation is successful, then + * check_unaligned_access_emulated() will verify that indeed the + * platform traps on misaligned accesses. + */ +} +#else +void __init unaligned_access_init(void) {} + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused) +{ + return 0; +} +#endif + +int cpu_online_unaligned_access_init(unsigned int cpu) +{ + int ret; + + ret = cpu_online_sbi_unaligned_setup(cpu); + if (ret) + return ret; + + return cpu_online_check_unaligned_access_emulated(cpu); +} diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index b8ba13819d05..ae2068425fbc 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -236,6 +236,11 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch); static int riscv_online_cpu(unsigned int cpu) { + int ret = cpu_online_unaligned_access_init(cpu); + + if (ret) + return ret; + /* We are already set since the last check */ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { goto exit; @@ -248,7 +253,6 @@ static int riscv_online_cpu(unsigned int cpu) { static struct page *buf; - check_unaligned_access_emulated(NULL); buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { pr_warn("Allocation failure, not measuring misaligned performance\n"); @@ -439,6 +443,8 @@ static int __init check_unaligned_access_all_cpus(void) { int cpu; + unaligned_access_init(); + if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n", speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); From 9f9f6fdd1dc6791bcfe251160a96a446199f85ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:24 +0200 Subject: [PATCH 30/69] riscv: misaligned: use on_each_cpu() for scalar misaligned access probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit schedule_on_each_cpu() was used without any good reason while documented as very slow. This call was in the boot path, so better use on_each_cpu() for scalar misaligned checking. Vector misaligned check still needs to use schedule_on_each_cpu() since it requires irqs to be enabled but that's less of a problem since this code is ran in a kthread. Add a comment to explicit that. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250523101932.1594077-8-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 592b1a28e897..34b4a4e9dfca 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -627,6 +627,10 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; + /* + * While being documented as very slow, schedule_on_each_cpu() is used since + * kernel_vector_begin() expects irqs to be enabled or it will panic() + */ schedule_on_each_cpu(check_vector_unaligned_access_emulated); for_each_online_cpu(cpu) @@ -647,7 +651,7 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) static bool unaligned_ctl __read_mostly; -static void check_unaligned_access_emulated(struct work_struct *work __always_unused) +static void check_unaligned_access_emulated(void *arg __always_unused) { int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); @@ -688,7 +692,7 @@ bool __init check_unaligned_access_emulated_all_cpus(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ - schedule_on_each_cpu(check_unaligned_access_emulated); + on_each_cpu(check_unaligned_access_emulated, NULL, 1); for_each_online_cpu(cpu) if (per_cpu(misaligned_access_speed, cpu) From 1317045a7d6f397904d105f6d40dc9787876a34b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:25 +0200 Subject: [PATCH 31/69] riscv: misaligned: declare misaligned_access_speed under CONFIG_RISCV_MISALIGNED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While misaligned_access_speed was defined in a file compile with CONFIG_RISCV_MISALIGNED, its definition was under CONFIG_RISCV_SCALAR_MISALIGNED. This resulted in compilation problems when using it in a file compiled with CONFIG_RISCV_MISALIGNED. Move the declaration under CONFIG_RISCV_MISALIGNED so that it can be used unconditionnally when compiled with that config and remove the check for that variable in traps_misaligned.c. Signed-off-by: Clément Léger Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250523101932.1594077-9-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 5 ++++- arch/riscv/kernel/traps_misaligned.c | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index dbe5970d4fe6..2bfa4ef383ed 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -72,7 +72,6 @@ int cpu_online_unaligned_access_init(unsigned int cpu); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) void unaligned_emulation_finish(void); bool unaligned_ctl_available(void); -DECLARE_PER_CPU(long, misaligned_access_speed); #else static inline bool unaligned_ctl_available(void) { @@ -80,6 +79,10 @@ static inline bool unaligned_ctl_available(void) } #endif +#if defined(CONFIG_RISCV_MISALIGNED) +DECLARE_PER_CPU(long, misaligned_access_speed); +#endif + bool __init check_vector_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_VECTOR_MISALIGNED) void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 34b4a4e9dfca..f1b2af515592 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -369,9 +369,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; -#endif if (!unaligned_enabled) return -1; From 4eaaa65e301208d6ff612ad2244c6174c9d852b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:26 +0200 Subject: [PATCH 32/69] riscv: misaligned: move emulated access uniformity check in a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the code that check for the uniformity of misaligned accesses performance on all cpus from check_unaligned_access_emulated_all_cpus() to its own function which will be used for delegation check. No functional changes intended. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250523101932.1594077-10-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index f1b2af515592..7ecaa8103fe7 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -645,6 +645,18 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) } #endif +static bool all_cpus_unaligned_scalar_access_emulated(void) +{ + int cpu; + + for_each_online_cpu(cpu) + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) + return false; + + return true; +} + #ifdef CONFIG_RISCV_SCALAR_MISALIGNED static bool unaligned_ctl __read_mostly; @@ -683,8 +695,6 @@ static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) bool __init check_unaligned_access_emulated_all_cpus(void) { - int cpu; - /* * We can only support PR_UNALIGN controls if all CPUs have misaligned * accesses emulated since tasks requesting such control can run on any @@ -692,10 +702,8 @@ bool __init check_unaligned_access_emulated_all_cpus(void) */ on_each_cpu(check_unaligned_access_emulated, NULL, 1); - for_each_online_cpu(cpu) - if (per_cpu(misaligned_access_speed, cpu) - != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) - return false; + if (!all_cpus_unaligned_scalar_access_emulated()) + return false; unaligned_ctl = true; return true; From 7977448bf374f6e9592153838f072a89bd3b5c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 23 May 2025 12:19:27 +0200 Subject: [PATCH 33/69] riscv: misaligned: add a function to check misalign trap delegability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking for the delegability of the misaligned access trap is needed for the KVM FWFT extension implementation. Add a function to get the delegability of the misaligned trap exception. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250523101932.1594077-11-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 6 ++++++ arch/riscv/kernel/traps_misaligned.c | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 2bfa4ef383ed..fbd0e4306c93 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -81,6 +81,12 @@ static inline bool unaligned_ctl_available(void) #if defined(CONFIG_RISCV_MISALIGNED) DECLARE_PER_CPU(long, misaligned_access_speed); +bool misaligned_traps_can_delegate(void); +#else +static inline bool misaligned_traps_can_delegate(void) +{ + return false; +} #endif bool __init check_vector_unaligned_access_emulated_all_cpus(void); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 7ecaa8103fe7..93043924fe6c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -724,10 +724,10 @@ static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) } #endif -#ifdef CONFIG_RISCV_SBI - static bool misaligned_traps_delegated; +#ifdef CONFIG_RISCV_SBI + static int cpu_online_sbi_unaligned_setup(unsigned int cpu) { if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) && @@ -763,6 +763,7 @@ static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused) { return 0; } + #endif int cpu_online_unaligned_access_init(unsigned int cpu) @@ -775,3 +776,15 @@ int cpu_online_unaligned_access_init(unsigned int cpu) return cpu_online_check_unaligned_access_emulated(cpu); } + +bool misaligned_traps_can_delegate(void) +{ + /* + * Either we successfully requested misaligned traps delegation for all + * CPUs, or the SBI does not implement the FWFT extension but delegated + * the exception by default. + */ + return misaligned_traps_delegated || + all_cpus_unaligned_scalar_access_emulated(); +} +EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate); From f8693f6dffcd1865da7f5f4c3df1de445e519bec Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:25 +0800 Subject: [PATCH 34/69] riscv: ftrace: support fastcc in Clang for WITH_ARGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some caller-saved registers which are not defined as function arguments in the ABI can still be passed as arguments when the kernel is compiled with Clang. As a result, we must save and restore those registers to prevent ftrace from clobbering them. - [1]: https://reviews.llvm.org/D68559 Reported-by: Evgenii Shatokhin Closes: https://lore.kernel.org/linux-riscv/7e7c7914-445d-426d-89a0-59a9199c45b1@yadro.com/ Fixes: 7caa9765465f ("ftrace: riscv: move from REGS to ARGS") Acked-by: Nathan Chancellor Reviewed-by: Björn Töpel Signed-off-by: Andy Chiu Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20250407180838.42877-1-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 7 +++++++ arch/riscv/kernel/asm-offsets.c | 7 +++++++ arch/riscv/kernel/mcount-dyn.S | 16 ++++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d627f63ee289..d8b2138bd9c6 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -146,6 +146,13 @@ struct __arch_ftrace_regs { unsigned long a5; unsigned long a6; unsigned long a7; +#ifdef CONFIG_CC_IS_CLANG + unsigned long t2; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; +#endif }; }; }; diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 16490755304e..7c43c8e26ae7 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -501,6 +501,13 @@ void asm_offsets(void) DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); +#ifdef CONFIG_CC_IS_CLANG + DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2)); + DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3)); + DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4)); + DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5)); + DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6)); +#endif DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 745dd4c4a69c..e988bd26b28b 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -96,7 +96,13 @@ REG_S x8, FREGS_S0(sp) #endif REG_S x6, FREGS_T1(sp) - +#ifdef CONFIG_CC_IS_CLANG + REG_S x7, FREGS_T2(sp) + REG_S x28, FREGS_T3(sp) + REG_S x29, FREGS_T4(sp) + REG_S x30, FREGS_T5(sp) + REG_S x31, FREGS_T6(sp) +#endif // save the arguments REG_S x10, FREGS_A0(sp) REG_S x11, FREGS_A1(sp) @@ -115,7 +121,13 @@ REG_L x8, FREGS_S0(sp) #endif REG_L x6, FREGS_T1(sp) - +#ifdef CONFIG_CC_IS_CLANG + REG_L x7, FREGS_T2(sp) + REG_L x28, FREGS_T3(sp) + REG_L x29, FREGS_T4(sp) + REG_L x30, FREGS_T5(sp) + REG_L x31, FREGS_T6(sp) +#endif // restore the arguments REG_L x10, FREGS_A0(sp) REG_L x11, FREGS_A1(sp) From 54ecbc8d857122b32a98fe204cb61a06aabc063d Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:26 +0800 Subject: [PATCH 35/69] riscv: ftrace factor out code defined by !WITH_ARG DYNAMIC_FTRACE selects DYNAMIC_FTRACE_WITH_ARGS and mcount-dyn.S in riscv, so we can remove ifdef jargons of WITH_ARG when it is known that DYNAMIC_FTRACE is true. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-2-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 15 --------------- arch/riscv/kernel/mcount-dyn.S | 34 ---------------------------------- 2 files changed, 49 deletions(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 674dcdfae7a1..1fd10555c580 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -210,7 +210,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { @@ -231,19 +230,5 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) *parent = return_hooker; } -#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -extern void ftrace_graph_call(void); -int ftrace_enable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); -} -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index e988bd26b28b..3f06b40bb6c8 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -56,8 +56,6 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - /** * SAVE_ABI_REGS - save regs against the ftrace_regs struct * @@ -149,36 +147,6 @@ mv a3, sp .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ - -#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS -SYM_FUNC_START(ftrace_caller) - SAVE_ABI - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp - -SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, ABI_RA - REG_L a1, ABI_T0(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 -#endif -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) - call ftrace_stub -#endif - RESTORE_ABI - jr t0 -SYM_FUNC_END(ftrace_caller) - -#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ SYM_FUNC_START(ftrace_caller) mv t1, zero SAVE_ABI_REGS @@ -194,8 +162,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) jr t1 SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) jr t0 From c41bf4326c7b0af3f7004235ac91551833ec95c6 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:27 +0800 Subject: [PATCH 36/69] riscv: ftrace: align patchable functions to 4 Byte boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are changing ftrace code patching in order to remove dependency from stop_machine() and enable kernel preemption. This requires us to align functions entry at a 4-B align address. However, -falign-functions on older versions of GCC alone was not strong enoungh to align all functions. In fact, cold functions are not aligned after turning on optimizations. We consider this is a bug in GCC and turn off guess-branch-probility as a workaround to align all functions. GCC bug id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345 The option -fmin-function-alignment is able to align all functions properly on newer versions of gcc. So, we add a cc-option to test if the toolchain supports it. Suggested-by: Evgenii Shatokhin Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250407180838.42877-3-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..7dbed10843d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -150,6 +150,7 @@ config RISCV select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) + select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC @@ -236,6 +237,7 @@ config CLANG_SUPPORTS_DYNAMIC_FTRACE config GCC_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=8) + depends on CC_HAS_MIN_FUNCTION_ALIGNMENT || !RISCV_ISA_C config HAVE_SHADOW_CALL_STACK def_bool $(cc-option,-fsanitize=shadow-call-stack) From 500e626c4a5bf2fa7cc6f1cd6dd86c77b5b127f2 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:28 +0800 Subject: [PATCH 37/69] kernel: ftrace: export ftrace_sync_ipi The following ftrace patch for riscv uses a data store to update ftrace function. Therefore, a romote fence is required to order it against function_trace_op updates. The mechanism is similar to the fence between function_trace_op and update_ftrace_func in the generic ftrace, so we leverage the same ftrace_sync_ipi function. [ alex: Fix build warning when !CONFIG_DYNAMIC_FTRACE ] Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-4-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbabc3d848b3..30374478cb07 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -635,6 +635,8 @@ enum { #define ftrace_get_symaddr(fentry_ip) (0) #endif +void ftrace_sync_ipi(void *data); + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 61130bb34d6c..31e9fe3bf964 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, op->saved_func(ip, parent_ip, op, fregs); } -static void ftrace_sync_ipi(void *data) +void ftrace_sync_ipi(void *data) { /* Probably not needed, but do it anyway */ smp_rmb(); From b2137c3b6d7a45622967aac78b46a4bd2cff6c42 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:29 +0800 Subject: [PATCH 38/69] riscv: ftrace: prepare ftrace for atomic code patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We use an AUIPC+JALR pair to jump into a ftrace trampoline. Since instruction fetch can break down to 4 byte at a time, it is impossible to update two instructions without a race. In order to mitigate it, we initialize the patchable entry to AUIPC + NOP4. Then, the run-time code patching can change NOP4 to JALR to eable/disable ftrcae from a function. This limits the reach of each ftrace entry to +-2KB displacing from ftrace_caller. Starting from the trampoline, we add a level of indirection for it to reach ftrace caller target. Now, it loads the target address from a memory location, then perform the jump. This enable the kernel to update the target atomically. The new don't-stop-the-world text patching on change only one RISC-V instruction: | -8: &ftrace_ops of the associated tracer function. | : | 0: auipc t0, hi(ftrace_caller) | 4: jalr t0, lo(ftrace_caller) | | -8: &ftrace_nop_ops | : | 0: auipc t0, hi(ftrace_caller) | 4: nop This means that f+0x0 is fixed, and should not be claimed by ftrace, e.g. kprobe should be able to put a probe in f+0x0. Thus, we adjust the offset and MCOUNT_INSN_SIZE accordingly. [ alex: Fix build errors with !CONFIG_DYNAMIC_FTRACE ] Co-developed-by: Björn Töpel Signed-off-by: Björn Töpel Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-5-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 49 ++++++------ arch/riscv/kernel/ftrace.c | 135 +++++++++++++++++--------------- arch/riscv/kernel/mcount-dyn.S | 9 +-- 3 files changed, 97 insertions(+), 96 deletions(-) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d8b2138bd9c6..6a5c0a7fb826 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -20,10 +20,9 @@ extern void *return_address(unsigned int level); #define ftrace_return_address(n) return_address(n) void _mcount(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - return addr; -} +unsigned long ftrace_call_adjust(unsigned long addr); +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip); +#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) /* * Let's do like x86/arm64 and ignore the compat syscalls. @@ -57,12 +56,21 @@ struct dyn_arch_ftrace { * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to * return address (original pc + 4) * + * The first 2 instructions for each tracable function is compiled to 2 nop + * instructions. Then, the kernel initializes the first instruction to auipc at + * boot time (). The second instruction is patched to jalr to + * start the trace. + * + *: + * 0: nop + * 4: nop + * *: - * 0: auipc t0/ra, 0x? - * 4: jalr t0/ra, ?(t0/ra) + * 0: auipc t0, 0x? + * 4: jalr t0, ?(t0) * *: - * 0: nop + * 0: auipc t0, 0x? * 4: nop * * Dynamic ftrace generates probes to call sites, so we must deal with @@ -75,10 +83,9 @@ struct dyn_arch_ftrace { #define AUIPC_OFFSET_MASK (0xfffff000) #define AUIPC_PAD (0x00001000) #define JALR_SHIFT 20 -#define JALR_RA (0x000080e7) -#define AUIPC_RA (0x00000097) #define JALR_T0 (0x000282e7) #define AUIPC_T0 (0x00000297) +#define JALR_RANGE (JALR_SIGN_MASK - 1) #define to_jalr_t0(offset) \ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) @@ -96,26 +103,14 @@ do { \ call[1] = to_jalr_t0(offset); \ } while (0) -#define to_jalr_ra(offset) \ - (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA) - -#define to_auipc_ra(offset) \ - ((offset & JALR_SIGN_MASK) ? \ - (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \ - ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA)) - -#define make_call_ra(caller, callee, call) \ -do { \ - unsigned int offset = \ - (unsigned long) (callee) - (unsigned long) (caller); \ - call[0] = to_auipc_ra(offset); \ - call[1] = to_jalr_ra(offset); \ -} while (0) - /* - * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. + * Only the jalr insn in the auipc+jalr is patched, so we make it 4 + * bytes here. */ -#define MCOUNT_INSN_SIZE 8 +#define MCOUNT_INSN_SIZE 4 +#define MCOUNT_AUIPC_SIZE 4 +#define MCOUNT_JALR_SIZE 4 +#define MCOUNT_NOP4_SIZE 4 #ifndef __ASSEMBLY__ struct dyn_ftrace; diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 1fd10555c580..ea04f09f9d4d 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,11 +8,22 @@ #include #include #include +#include #include #include #include #ifdef CONFIG_DYNAMIC_FTRACE +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr + MCOUNT_AUIPC_SIZE; +} + +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) +{ + return fentry_ip - MCOUNT_AUIPC_SIZE; +} + void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { mutex_lock(&text_mutex); @@ -32,51 +43,32 @@ void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) mutex_unlock(&text_mutex); } -static int ftrace_check_current_call(unsigned long hook_pos, - unsigned int *expected) +static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate) { + unsigned int call[2], offset; unsigned int replaced[2]; - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; - /* we expect nops at the hook position */ - if (!expected) - expected = nops; + offset = target - source; + call[1] = to_jalr_t0(offset); - /* - * Read the text we want to modify; - * return must be -EFAULT on read error - */ - if (copy_from_kernel_nofault(replaced, (void *)hook_pos, - MCOUNT_INSN_SIZE)) - return -EFAULT; + if (validate) { + call[0] = to_auipc_t0(offset); + /* + * Read the text we want to modify; + * return must be -EFAULT on read error + */ + if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE)) + return -EFAULT; - /* - * Make sure it is what we expect it to be; - * return must be -EINVAL on failed comparison - */ - if (memcmp(expected, replaced, sizeof(replaced))) { - pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n", - (void *)hook_pos, expected[0], expected[1], replaced[0], - replaced[1]); - return -EINVAL; + if (replaced[0] != call[0]) { + pr_err("%p: expected (%08x) but got (%08x)\n", + (void *)source, call[0], replaced[0]); + return -EINVAL; + } } - return 0; -} - -static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, - bool enable, bool ra) -{ - unsigned int call[2]; - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; - - if (ra) - make_call_ra(hook_pos, target, call); - else - make_call_t0(hook_pos, target, call); - - /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) + /* Replace the jalr at once. Return -EPERM on write error. */ + if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE)) return -EPERM; return 0; @@ -84,22 +76,21 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int call[2]; + unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE; - make_call_t0(rec->ip, addr, call); + orig_addr = (unsigned long)&ftrace_caller; + distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr; + if (distance > JALR_RANGE) + return -EINVAL; - if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE)) - return -EPERM; - - return 0; + return __ftrace_modify_call(pc, addr, false); } -int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, - unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; + u32 nop4 = RISCV_INSN_NOP4; - if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE)) return -EPERM; return 0; @@ -114,21 +105,38 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, */ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { - int out; + unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE; + unsigned int nops[2], offset; + int ret; + + offset = (unsigned long) &ftrace_caller - pc; + nops[0] = to_auipc_t0(offset); + nops[1] = RISCV_INSN_NOP4; mutex_lock(&text_mutex); - out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE); mutex_unlock(&text_mutex); - return out; + return ret; } +ftrace_func_t ftrace_call_dest = ftrace_stub; int ftrace_update_ftrace_func(ftrace_func_t func) { - int ret = __ftrace_modify_call((unsigned long)&ftrace_call, - (unsigned long)func, true, true); - - return ret; + WRITE_ONCE(ftrace_call_dest, func); + /* + * The data fence ensure that the update to ftrace_call_dest happens + * before the write to function_trace_op later in the generic ftrace. + * If the sequence is not enforced, then an old ftrace_call_dest may + * race loading a new function_trace_op set in ftrace_modify_all_code + * + * If we are in stop_machine, then we don't need to call remote fence + * as there is no concurrent read-side of ftrace_call_dest. + */ + smp_wmb(); + if (!irqs_disabled()) + smp_call_function(ftrace_sync_ipi, NULL, 1); + return 0; } struct ftrace_modify_param { @@ -166,23 +174,22 @@ void arch_ftrace_update_code(int command) stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); } -#endif +#else /* CONFIG_DYNAMIC_FTRACE */ +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { + unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE; unsigned int call[2]; - unsigned long caller = rec->ip; - int ret; make_call_t0(caller, old_addr, call); - ret = ftrace_check_current_call(caller, call); - - if (ret) - return ret; - - return __ftrace_modify_call(caller, addr, true, false); + return __ftrace_modify_call(caller, addr, true); } #endif diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 3f06b40bb6c8..8aa554d56096 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -13,7 +13,6 @@ .text -#define FENTRY_RA_OFFSET 8 #define ABI_SIZE_ON_STACK 80 #define ABI_A0 0 #define ABI_A1 8 @@ -62,8 +61,7 @@ * After the stack is established, * * 0(sp) stores the PC of the traced function which can be accessed -* by &(fregs)->epc in tracing function. Note that the real -* function entry address should be computed with -FENTRY_RA_OFFSET. +* by &(fregs)->epc in tracing function. * * 8(sp) stores the function return address (i.e. parent IP) that * can be accessed by &(fregs)->ra in tracing function. @@ -140,7 +138,7 @@ .endm .macro PREPARE_ARGS - addi a0, t0, -FENTRY_RA_OFFSET + addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn) la a1, function_trace_op REG_L a2, 0(a1) mv a1, ra @@ -153,7 +151,8 @@ SYM_FUNC_START(ftrace_caller) PREPARE_ARGS SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub + REG_L ra, ftrace_call_dest + jalr ra, 0(ra) RESTORE_ABI_REGS bnez t1, .Ldirect From 5aa4ef95588456df5a5563dd23892827f97fb14f Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:30 +0800 Subject: [PATCH 39/69] riscv: ftrace: do not use stop_machine to update code Now it is safe to remove dependency from stop_machine() for us to patch code in ftrace. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-6-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 64 ++++++-------------------------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index ea04f09f9d4d..b133c60808fe 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -24,23 +24,13 @@ unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) return fentry_ip - MCOUNT_AUIPC_SIZE; } -void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) +void arch_ftrace_update_code(int command) { mutex_lock(&text_mutex); - - /* - * The code sequences we use for ftrace can't be patched while the - * kernel is running, so we need to use stop_machine() to modify them - * for now. This doesn't play nice with text_mutex, we use this flag - * to elide the check. - */ - riscv_patch_in_stop_machine = true; -} - -void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) -{ - riscv_patch_in_stop_machine = false; + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); mutex_unlock(&text_mutex); + flush_icache_all(); } static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate) @@ -129,51 +119,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) * before the write to function_trace_op later in the generic ftrace. * If the sequence is not enforced, then an old ftrace_call_dest may * race loading a new function_trace_op set in ftrace_modify_all_code - * - * If we are in stop_machine, then we don't need to call remote fence - * as there is no concurrent read-side of ftrace_call_dest. */ smp_wmb(); - if (!irqs_disabled()) - smp_call_function(ftrace_sync_ipi, NULL, 1); + /* + * Updating ftrace dpes not take stop_machine path, so irqs should not + * be disabled. + */ + WARN_ON(irqs_disabled()); + smp_call_function(ftrace_sync_ipi, NULL, 1); return 0; } -struct ftrace_modify_param { - int command; - atomic_t cpu_count; -}; - -static int __ftrace_modify_code(void *data) -{ - struct ftrace_modify_param *param = data; - - if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { - ftrace_modify_all_code(param->command); - /* - * Make sure the patching store is effective *before* we - * increment the counter which releases all waiting CPUs - * by using the release variant of atomic increment. The - * release pairs with the call to local_flush_icache_all() - * on the waiting CPU. - */ - atomic_inc_return_release(¶m->cpu_count); - } else { - while (atomic_read(¶m->cpu_count) <= num_online_cpus()) - cpu_relax(); - - local_flush_icache_all(); - } - - return 0; -} - -void arch_ftrace_update_code(int command) -{ - struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; - - stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); -} #else /* CONFIG_DYNAMIC_FTRACE */ unsigned long ftrace_call_adjust(unsigned long addr) { From d1049fc0de81bca3abbb35e8d4b8794170498b78 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:31 +0800 Subject: [PATCH 40/69] riscv: vector: Support calling schedule() for preemptible Vector Each function entry implies a call to ftrace infrastructure. And it may call into schedule in some cases. So, it is possible for preemptible kernel-mode Vector to implicitly call into schedule. Since all V-regs are caller-saved, it is possible to drop all V context when a thread voluntarily call schedule(). Besides, we currently don't pass argument through vector register, so we don't have to save/restore V-regs in ftrace trampoline. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-7-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 5 +++++ arch/riscv/include/asm/vector.h | 22 +++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 5f56eb9d114a..9c1cc716b891 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -79,6 +79,10 @@ struct pt_regs; * Thus, the task does not own preempt_v. Any use of Vector will have to * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode * Vector. + * - bit 29: The thread voluntarily calls schedule() while holding an active + * preempt_v. All preempt_v context should be dropped in such case because + * V-regs are caller-saved. Only sstatus.VS=ON is persisted across a + * schedule() call. * - bit 30: The in-kernel preempt_v context is saved, and requries to be * restored when returning to the context that owns the preempt_v. * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the @@ -93,6 +97,7 @@ struct pt_regs; #define RISCV_PREEMPT_V 0x00000100 #define RISCV_PREEMPT_V_DIRTY 0x80000000 #define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000 +#define RISCV_PREEMPT_V_IN_SCHEDULE 0x20000000 /* CPU-specific state of a task */ struct thread_struct { diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index e8a83f55be2b..45c9b426fcc5 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -120,6 +120,11 @@ static __always_inline void riscv_v_disable(void) csr_clear(CSR_SSTATUS, SR_VS); } +static __always_inline bool riscv_v_is_on(void) +{ + return !!(csr_read(CSR_SSTATUS) & SR_VS); +} + static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { asm volatile ( @@ -366,6 +371,11 @@ static inline void __switch_to_vector(struct task_struct *prev, struct pt_regs *regs; if (riscv_preempt_v_started(prev)) { + if (riscv_v_is_on()) { + WARN_ON(prev->thread.riscv_v_flags & RISCV_V_CTX_DEPTH_MASK); + riscv_v_disable(); + prev->thread.riscv_v_flags |= RISCV_PREEMPT_V_IN_SCHEDULE; + } if (riscv_preempt_v_dirty(prev)) { __riscv_v_vstate_save(&prev->thread.kernel_vstate, prev->thread.kernel_vstate.datap); @@ -376,10 +386,16 @@ static inline void __switch_to_vector(struct task_struct *prev, riscv_v_vstate_save(&prev->thread.vstate, regs); } - if (riscv_preempt_v_started(next)) - riscv_preempt_v_set_restore(next); - else + if (riscv_preempt_v_started(next)) { + if (next->thread.riscv_v_flags & RISCV_PREEMPT_V_IN_SCHEDULE) { + next->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_IN_SCHEDULE; + riscv_v_enable(); + } else { + riscv_preempt_v_set_restore(next); + } + } else { riscv_v_vstate_set_restore(next, task_pt_regs(next)); + } } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); From ca358692de41b273468e625f96926fa53e13bd8c Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:32 +0800 Subject: [PATCH 41/69] riscv: add a data fence for CMODX in the kernel mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V spec explicitly calls out that a local fence.i is not enough for the code modification to be visble from a remote hart. In fact, it states: To make a store to instruction memory visible to all RISC-V harts, the writing hart also has to execute a data FENCE before requesting that all remote RISC-V harts execute a FENCE.I. Although current riscv drivers for IPI use ordered MMIO when sending IPIs in order to synchronize the action between previous csd writes, riscv does not restrict itself to any particular flavor of IPI. Any driver or firmware implementation that does not order data writes before the IPI may pose a risk for code-modifying race. Thus, add a fence here to order data writes before making the IPI. Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250407180838.42877-8-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index b81672729887..b2e4b81763f8 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -24,7 +24,20 @@ void flush_icache_all(void) if (num_online_cpus() < 2) return; - else if (riscv_use_sbi_for_rfence()) + + /* + * Make sure all previous writes to the D$ are ordered before making + * the IPI. The RISC-V spec states that a hart must execute a data fence + * before triggering a remote fence.i in order to make the modification + * visable for remote harts. + * + * IPIs on RISC-V are triggered by MMIO writes to either CLINT or + * S-IMSIC, so the fence ensures previous data writes "happen before" + * the MMIO. + */ + RISCV_FENCE(w, o); + + if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); From d0262e907e2991ae09ca476281fc8cae3ec57850 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:33 +0800 Subject: [PATCH 42/69] riscv: ftrace: support PREEMPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now, we can safely enable dynamic ftrace with kernel preemption. Signed-off-by: Andy Chiu Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250407180838.42877-9-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7dbed10843d2..dc0fc11b6e96 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -157,7 +157,7 @@ config RISCV select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION + select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_EBPF_JIT if MMU select HAVE_GUP_FAST if MMU select HAVE_FUNCTION_ARG_ACCESS_API From c217157bcd1df434fdeaeb24b7c25ec31e15cf4a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Tue, 8 Apr 2025 02:08:34 +0800 Subject: [PATCH 43/69] riscv: Implement HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables support for DYNAMIC_FTRACE_WITH_CALL_OPS on RISC-V. This allows each ftrace callsite to provide an ftrace_ops to the common ftrace trampoline, allowing each callsite to invoke distinct tracer functions without the need to fall back to list processing or to allocate custom trampolines for each callsite. This significantly speeds up cases where multiple distinct trace functions are used and callsites are mostly traced by a single tracer. The idea and most of the implementation is taken from the ARM64's implementation of the same feature. The idea is to place a pointer to the ftrace_ops as a literal at a fixed offset from the function entry point, which can be recovered by the common ftrace trampoline. We use -fpatchable-function-entry to reserve 8 bytes above the function entry by emitting 2 4 byte or 4 2 byte nops depending on the presence of CONFIG_RISCV_ISA_C. These 8 bytes are patched at runtime with a pointer to the associated ftrace_ops for that callsite. Functions are aligned to 8 bytes to make sure that the accesses to this literal are atomic. This approach allows for directly invoking ftrace_ops::func even for ftrace_ops which are dynamically-allocated (or part of a module), without going via ftrace_ops_list_func. We've benchamrked this with the ftrace_ops sample module on Spacemit K1 Jupiter: Without this patch: baseline (Linux rivos 6.14.0-09584-g7d06015d936c #3 SMP Sat Mar 29 +-----------------------+-----------------+----------------------------+ | Number of tracers | Total time (ns) | Per-call average time | |-----------------------+-----------------+----------------------------| | Relevant | Irrelevant | 100000 calls | Total (ns) | Overhead (ns) | |----------+------------+-----------------+------------+---------------| | 0 | 0 | 1357958 | 13 | - | | 0 | 1 | 1302375 | 13 | - | | 0 | 2 | 1302375 | 13 | - | | 0 | 10 | 1379084 | 13 | - | | 0 | 100 | 1302458 | 13 | - | | 0 | 200 | 1302333 | 13 | - | |----------+------------+-----------------+------------+---------------| | 1 | 0 | 13677833 | 136 | 123 | | 1 | 1 | 18500916 | 185 | 172 | | 1 | 2 | 22856459 | 228 | 215 | | 1 | 10 | 58824709 | 588 | 575 | | 1 | 100 | 505141584 | 5051 | 5038 | | 1 | 200 | 1580473126 | 15804 | 15791 | |----------+------------+-----------------+------------+---------------| | 1 | 0 | 13561000 | 135 | 122 | | 2 | 0 | 19707292 | 197 | 184 | | 10 | 0 | 67774750 | 677 | 664 | | 100 | 0 | 714123125 | 7141 | 7128 | | 200 | 0 | 1918065668 | 19180 | 19167 | +----------+------------+-----------------+------------+---------------+ Note: per-call overhead is estimated relative to the baseline case with 0 relevant tracers and 0 irrelevant tracers. With this patch: v4-rc4 (Linux rivos 6.14.0-09598-gd75747611c93 #4 SMP Sat Mar 29 +-----------------------+-----------------+----------------------------+ | Number of tracers | Total time (ns) | Per-call average time | |-----------------------+-----------------+----------------------------| | Relevant | Irrelevant | 100000 calls | Total (ns) | Overhead (ns) | |----------+------------+-----------------+------------+---------------| | 0 | 0 | 1459917 | 14 | - | | 0 | 1 | 1408000 | 14 | - | | 0 | 2 | 1383792 | 13 | - | | 0 | 10 | 1430709 | 14 | - | | 0 | 100 | 1383791 | 13 | - | | 0 | 200 | 1383750 | 13 | - | |----------+------------+-----------------+------------+---------------| | 1 | 0 | 5238041 | 52 | 38 | | 1 | 1 | 5228542 | 52 | 38 | | 1 | 2 | 5325917 | 53 | 40 | | 1 | 10 | 5299667 | 52 | 38 | | 1 | 100 | 5245250 | 52 | 39 | | 1 | 200 | 5238459 | 52 | 39 | |----------+------------+-----------------+------------+---------------| | 1 | 0 | 5239083 | 52 | 38 | | 2 | 0 | 19449417 | 194 | 181 | | 10 | 0 | 67718584 | 677 | 663 | | 100 | 0 | 709840708 | 7098 | 7085 | | 200 | 0 | 2203580626 | 22035 | 22022 | +----------+------------+-----------------+------------+---------------+ Note: per-call overhead is estimated relative to the baseline case with 0 relevant tracers and 0 irrelevant tracers. As can be seen from the above: a) Whenever there is a single relevant tracer function associated with a tracee, the overhead of invoking the tracer is constant, and does not scale with the number of tracers which are *not* associated with that tracee. b) The overhead for a single relevant tracer has dropped to ~1/3 of the overhead prior to this series (from 122ns to 38ns). This is largely due to permitting calls to dynamically-allocated ftrace_ops without going through ftrace_ops_list_func. Signed-off-by: Puranjay Mohan [update kconfig, asm, refactor] Signed-off-by: Andy Chiu Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20250407180838.42877-10-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 + arch/riscv/Makefile | 4 +- arch/riscv/kernel/asm-offsets.c | 3 ++ arch/riscv/kernel/ftrace.c | 67 +++++++++++++++++++++++++++++++++ arch/riscv/kernel/mcount-dyn.S | 35 +++++++++++++++-- 5 files changed, 105 insertions(+), 6 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index dc0fc11b6e96..ec986c9120e3 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -99,6 +99,7 @@ config RISCV select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE + select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP @@ -152,6 +153,7 @@ config RISCV select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG) select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 539d2aef5cab..df57654a615e 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -15,9 +15,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) LDFLAGS_vmlinux += --no-relax KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ifeq ($(CONFIG_RISCV_ISA_C),y) - CC_FLAGS_FTRACE := -fpatchable-function-entry=4 + CC_FLAGS_FTRACE := -fpatchable-function-entry=8,4 else - CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2 endif endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 7c43c8e26ae7..2d96197a8abf 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -493,6 +493,9 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); +#ifdef CONFIG_FUNCTION_TRACER + DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); +#endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b133c60808fe..d56fc6e9fba0 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -16,6 +16,9 @@ #ifdef CONFIG_DYNAMIC_FTRACE unsigned long ftrace_call_adjust(unsigned long addr) { + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return addr + 8; + return addr + MCOUNT_AUIPC_SIZE; } @@ -64,9 +67,52 @@ static int __ftrace_modify_call(unsigned long source, unsigned long target, bool return 0; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } + + if (!ops) + ops = &ftrace_list_ops; + + return ops; +} + +static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, + const struct ftrace_ops *ops) +{ + unsigned long literal = rec->ip - 8; + + return patch_text_nosync((void *)literal, &ops, sizeof(ops)); +} + +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE; + int ret; + + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; orig_addr = (unsigned long)&ftrace_caller; distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr; @@ -79,6 +125,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { u32 nop4 = RISCV_INSN_NOP4; + int ret; + + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE)) return -EPERM; @@ -99,6 +150,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) unsigned int nops[2], offset; int ret; + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; + offset = (unsigned long) &ftrace_caller - pc; nops[0] = to_auipc_t0(offset); nops[1] = RISCV_INSN_NOP4; @@ -113,6 +168,13 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) ftrace_func_t ftrace_call_dest = ftrace_stub; int ftrace_update_ftrace_func(ftrace_func_t func) { + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + WRITE_ONCE(ftrace_call_dest, func); /* * The data fence ensure that the update to ftrace_call_dest happens @@ -143,8 +205,13 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, { unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE; unsigned int call[2]; + int ret; make_call_t0(caller, old_addr, call); + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; + return __ftrace_modify_call(caller, addr, true); } #endif diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 8aa554d56096..699684eea7f0 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -139,10 +139,34 @@ .macro PREPARE_ARGS addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* + * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the + * function entry, these are later overwritten with the pointer to the + * associated struct ftrace_ops. + * + * -8: &ftrace_ops of the associated tracer function. + *: + * 0: auipc t0/ra, 0x? + * 4: jalr t0/ra, ?(t0/ra) + * + * -8: &ftrace_nop_ops + *: + * 0: nop + * 4: nop + * + * t0 is set to ip+8 after the jalr is executed at the callsite, + * so we find the associated op at t0-16. + */ + mv a1, ra // parent_ip + REG_L a2, -16(t0) // op + REG_L ra, FTRACE_OPS_FUNC(a2) // op->func +#else la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + REG_L a2, 0(a1) // op + mv a1, ra // parent_ip +#endif + mv a3, sp // regs .endm SYM_FUNC_START(ftrace_caller) @@ -150,10 +174,13 @@ SYM_FUNC_START(ftrace_caller) SAVE_ABI_REGS PREPARE_ARGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + jalr ra +#else SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) REG_L ra, ftrace_call_dest jalr ra, 0(ra) - +#endif RESTORE_ABI_REGS bnez t1, .Ldirect jr t0 From b21cdb9523e5561b97fd534dbb75d132c5c938ff Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:35 +0800 Subject: [PATCH 44/69] riscv: ftrace: support direct call using call_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jump to FTRACE_ADDR if distance is out of reach Co-developed-by: Björn Töpel Signed-off-by: Björn Töpel Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-11-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/ftrace.h | 6 ++++ arch/riscv/kernel/asm-offsets.c | 3 ++ arch/riscv/kernel/ftrace.c | 13 ++++----- arch/riscv/kernel/mcount-dyn.S | 51 +++++++++++++++++++++------------ 5 files changed, 48 insertions(+), 27 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ec986c9120e3..8fdca6345fa3 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -152,7 +152,7 @@ config RISCV select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C - select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG) select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 6a5c0a7fb826..22ebea3c2b26 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -130,6 +130,9 @@ struct __arch_ftrace_regs { unsigned long sp; unsigned long s0; unsigned long t1; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long direct_tramp; +#endif union { unsigned long args[8]; struct { @@ -223,10 +226,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #define ftrace_graph_func ftrace_graph_func +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { arch_ftrace_regs(fregs)->t1 = addr; } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 2d96197a8abf..b26334075697 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -495,6 +495,9 @@ void asm_offsets(void) OFFSET(STACKFRAME_RA, stackframe, ra); #ifdef CONFIG_FUNCTION_TRACER DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index d56fc6e9fba0..4c6c24380cfd 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -17,7 +17,7 @@ unsigned long ftrace_call_adjust(unsigned long addr) { if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) - return addr + 8; + return addr + 8 + MCOUNT_AUIPC_SIZE; return addr + MCOUNT_AUIPC_SIZE; } @@ -83,10 +83,9 @@ static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec) return ops; } -static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, - const struct ftrace_ops *ops) +static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops) { - unsigned long literal = rec->ip - 8; + unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8); return patch_text_nosync((void *)literal, &ops, sizeof(ops)); } @@ -117,7 +116,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) orig_addr = (unsigned long)&ftrace_caller; distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr; if (distance > JALR_RANGE) - return -EINVAL; + addr = FTRACE_ADDR; return __ftrace_modify_call(pc, addr, false); } @@ -204,15 +203,13 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE; - unsigned int call[2]; int ret; - make_call_t0(caller, old_addr, call); ret = ftrace_rec_update_ops(rec); if (ret) return ret; - return __ftrace_modify_call(caller, addr, true); + return __ftrace_modify_call(caller, FTRACE_ADDR, true); } #endif diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 699684eea7f0..48f6c4f7dca0 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -82,12 +82,9 @@ * +++++++++ **/ .macro SAVE_ABI_REGS - mv t4, sp // Save original SP in T4 addi sp, sp, -FREGS_SIZE_ON_STACK - REG_S t0, FREGS_EPC(sp) REG_S x1, FREGS_RA(sp) - REG_S t4, FREGS_SP(sp) // Put original SP on stack #ifdef HAVE_FUNCTION_GRAPH_FP_TEST REG_S x8, FREGS_S0(sp) #endif @@ -108,9 +105,12 @@ REG_S x15, FREGS_A5(sp) REG_S x16, FREGS_A6(sp) REG_S x17, FREGS_A7(sp) + mv a0, sp + addi a0, a0, FREGS_SIZE_ON_STACK + REG_S a0, FREGS_SP(sp) // Put original SP on stack .endm - .macro RESTORE_ABI_REGS, all=0 + .macro RESTORE_ABI_REGS REG_L t0, FREGS_EPC(sp) REG_L x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -139,6 +139,19 @@ .macro PREPARE_ARGS addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + mv a1, ra // parent_ip + REG_L a2, -16(t0) // op + REG_L ra, FTRACE_OPS_FUNC(a2) // op->func +#else + la a1, function_trace_op + REG_L a2, 0(a1) // op + mv a1, ra // parent_ip +#endif + mv a3, sp // regs + .endm + +SYM_FUNC_START(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS /* * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the @@ -158,19 +171,17 @@ * t0 is set to ip+8 after the jalr is executed at the callsite, * so we find the associated op at t0-16. */ - mv a1, ra // parent_ip - REG_L a2, -16(t0) // op - REG_L ra, FTRACE_OPS_FUNC(a2) // op->func -#else - la a1, function_trace_op - REG_L a2, 0(a1) // op - mv a1, ra // parent_ip -#endif - mv a3, sp // regs - .endm + REG_L t1, -16(t0) // op Should be SZ_REG instead of 16 -SYM_FUNC_START(ftrace_caller) - mv t1, zero +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* + * If the op has a direct call, handle it immediately without + * saving/restoring registers. + */ + REG_L t1, FTRACE_OPS_DIRECT_CALL(t1) + bnez t1, ftrace_caller_direct +#endif +#endif SAVE_ABI_REGS PREPARE_ARGS @@ -182,10 +193,14 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) jalr ra, 0(ra) #endif RESTORE_ABI_REGS - bnez t1, .Ldirect +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnez t1, ftrace_caller_direct +#endif jr t0 -.Ldirect: +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL) jr t1 +#endif SYM_FUNC_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS From d8ac85dad407f4cea65a970e9ba42201cb49686a Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:36 +0800 Subject: [PATCH 45/69] riscv: Documentation: add a description about dynamic ftrace Add a section in cmodx to describe how dynamic ftrace works on riscv, limitations, and assumptions. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-12-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/cmodx.rst | 46 +++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index 8c48bcff3df9..e009873b2d17 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -10,13 +10,45 @@ modified by the program itself. Instruction storage and the instruction cache program must enforce its own synchronization with the unprivileged fence.i instruction. -However, the default Linux ABI prohibits the use of fence.i in userspace -applications. At any point the scheduler may migrate a task onto a new hart. If -migration occurs after the userspace synchronized the icache and instruction -storage with fence.i, the icache on the new hart will no longer be clean. This -is due to the behavior of fence.i only affecting the hart that it is called on. -Thus, the hart that the task has been migrated to may not have synchronized -instruction storage and icache. +CMODX in the Kernel Space +--------------------- + +Dynamic ftrace +--------------------- + +Essentially, dynamic ftrace directs the control flow by inserting a function +call at each patchable function entry, and patches it dynamically at runtime to +enable or disable the redirection. In the case of RISC-V, 2 instructions, +AUIPC + JALR, are required to compose a function call. However, it is impossible +to patch 2 instructions and expect that a concurrent read-side executes them +without a race condition. This series makes atmoic code patching possible in +RISC-V ftrace. Kernel preemption makes things even worse as it allows the old +state to persist across the patching process with stop_machine(). + +In order to get rid of stop_machine() and run dynamic ftrace with full kernel +preemption, we partially initialize each patchable function entry at boot-time, +setting the first instruction to AUIPC, and the second to NOP. Now, atmoic +patching is possible because the kernel only has to update one instruction. +According to Ziccif, as long as an instruction is naturally aligned, the ISA +guarantee an atomic update. + +By fixing down the first instruction, AUIPC, the range of the ftrace trampoline +is limited to +-2K from the predetermined target, ftrace_caller, due to the lack +of immediate encoding space in RISC-V. To address the issue, we introduce +CALL_OPS, where an 8B naturally align metadata is added in front of each +pacthable function. The metadata is resolved at the first trampoline, then the +execution can be derect to another custom trampoline. + +CMODX in the User Space +--------------------- + +Though fence.i is an unprivileged instruction, the default Linux ABI prohibits +the use of fence.i in userspace applications. At any point the scheduler may +migrate a task onto a new hart. If migration occurs after the userspace +synchronized the icache and instruction storage with fence.i, the icache on the +new hart will no longer be clean. This is due to the behavior of fence.i only +affecting the hart that it is called on. Thus, the hart that the task has been +migrated to may not have synchronized instruction storage and icache. There are two ways to solve this problem: use the riscv_flush_icache() syscall, or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in From 1df45f8a9fea5a7513bd1bad98604ce1fbefcaaf Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Wed, 9 Apr 2025 21:29:58 +0200 Subject: [PATCH 46/69] riscv: kexec_file: Split the loading of kernel and others MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the preparative patch for kexec_file_load Image support. It separates the elf_kexec_load() as two parts: - the first part loads the vmlinux (or Image) - the second part loads other segments (e.g. initrd,fdt,purgatory) And the second part is exported as the load_extra_segments() function which would be used in both kexec-elf.c and kexec-image.c. No functional change intended. Signed-off-by: Song Shuai Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20250409193004.643839-2-bjorn@kernel.org Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 5 + arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/elf_kexec.c | 485 ------------------------- arch/riscv/kernel/kexec_elf.c | 144 ++++++++ arch/riscv/kernel/machine_kexec_file.c | 360 ++++++++++++++++++ 5 files changed, 510 insertions(+), 486 deletions(-) delete mode 100644 arch/riscv/kernel/elf_kexec.c create mode 100644 arch/riscv/kernel/kexec_elf.c diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 2b56769cb530..518825fe4160 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -67,6 +67,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, struct kimage; int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + +int load_extra_segments(struct kimage *image, unsigned long kernel_start, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len); #endif #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8d186bfced45..d56305c8e631 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -107,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o -obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c deleted file mode 100644 index e783a72d051f..000000000000 --- a/arch/riscv/kernel/elf_kexec.c +++ /dev/null @@ -1,485 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Load ELF vmlinux file for the kexec_file_load syscall. - * - * Copyright (C) 2021 Huawei Technologies Co, Ltd. - * - * Author: Liao Chang (liaochang1@huawei.com) - * - * Based on kexec-tools' kexec-elf-riscv.c, heavily modified - * for kernel. - */ - -#define pr_fmt(fmt) "kexec_image: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - kvfree(image->arch.fdt); - image->arch.fdt = NULL; - - vfree(image->elf_headers); - image->elf_headers = NULL; - image->elf_headers_sz = 0; - - return kexec_image_post_load_cleanup_default(image); -} - -static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, - struct kexec_elf_info *elf_info, unsigned long old_pbase, - unsigned long new_pbase) -{ - int i; - int ret = 0; - size_t size; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - - kbuf.image = image; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - size = phdr->p_filesz; - if (size > phdr->p_memsz) - size = phdr->p_memsz; - - kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; - kbuf.bufsz = size; - kbuf.buf_align = phdr->p_align; - kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; - kbuf.memsz = phdr->p_memsz; - kbuf.top_down = false; - ret = kexec_add_buffer(&kbuf); - if (ret) - break; - } - - return ret; -} - -/* - * Go through the available phsyical memory regions and find one that hold - * an image of the specified size. - */ -static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, - struct elfhdr *ehdr, struct kexec_elf_info *elf_info, - unsigned long *old_pbase, unsigned long *new_pbase) -{ - int i; - int ret; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - unsigned long lowest_paddr = ULONG_MAX; - unsigned long lowest_vaddr = ULONG_MAX; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - if (lowest_paddr > phdr->p_paddr) - lowest_paddr = phdr->p_paddr; - - if (lowest_vaddr > phdr->p_vaddr) - lowest_vaddr = phdr->p_vaddr; - } - - kbuf.image = image; - kbuf.buf_min = lowest_paddr; - kbuf.buf_max = ULONG_MAX; - - /* - * Current riscv boot protocol requires 2MB alignment for - * RV64 and 4MB alignment for RV32 - * - */ - kbuf.buf_align = PMD_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); - kbuf.top_down = false; - ret = arch_kexec_locate_mem_hole(&kbuf); - if (!ret) { - *old_pbase = lowest_paddr; - *new_pbase = kbuf.mem; - image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; - } - return ret; -} - -#ifdef CONFIG_CRASH_DUMP -static int get_nr_ram_ranges_callback(struct resource *res, void *arg) -{ - unsigned int *nr_ranges = arg; - - (*nr_ranges)++; - return 0; -} - -static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) -{ - struct crash_mem *cmem = arg; - - cmem->ranges[cmem->nr_ranges].start = res->start; - cmem->ranges[cmem->nr_ranges].end = res->end; - cmem->nr_ranges++; - - return 0; -} - -static int prepare_elf_headers(void **addr, unsigned long *sz) -{ - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 1; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; -} - -static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, - unsigned long cmdline_len) -{ - int elfcorehdr_strlen; - char *cmdline_ptr; - - cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); - if (!cmdline_ptr) - return NULL; - - elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", - image->elf_load_addr); - - if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { - pr_err("Appending elfcorehdr= exceeds cmdline size\n"); - kfree(cmdline_ptr); - return NULL; - } - - memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); - /* Ensure it's nul terminated */ - cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; - return cmdline_ptr; -} -#endif - -static void *elf_kexec_load(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len) -{ - int ret; - void *fdt; - unsigned long old_kernel_pbase = ULONG_MAX; - unsigned long new_kernel_pbase = 0UL; - unsigned long initrd_pbase = 0UL; - unsigned long kernel_start; - struct elfhdr ehdr; - struct kexec_buf kbuf; - struct kexec_elf_info elf_info; - char *modified_cmdline = NULL; - - ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); - if (ret) - return ERR_PTR(ret); - - ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, - &old_kernel_pbase, &new_kernel_pbase); - if (ret) - goto out; - kernel_start = image->start; - - /* Add the kernel binary to the image */ - ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, - old_kernel_pbase, new_kernel_pbase); - if (ret) - goto out; - - kbuf.image = image; - kbuf.buf_min = new_kernel_pbase + kernel_len; - kbuf.buf_max = ULONG_MAX; - -#ifdef CONFIG_CRASH_DUMP - /* Add elfcorehdr */ - if (image->type == KEXEC_TYPE_CRASH) { - void *headers; - unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out; - } - - kbuf.buffer = headers; - kbuf.bufsz = headers_sz; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = headers_sz; - kbuf.buf_align = ELF_CORE_HEADER_ALIGN; - kbuf.top_down = true; - - ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree(headers); - goto out; - } - image->elf_headers = headers; - image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; - - kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.memsz); - - /* Setup cmdline for kdump kernel case */ - modified_cmdline = setup_kdump_cmdline(image, cmdline, - cmdline_len); - if (!modified_cmdline) { - pr_err("Setting up cmdline for kdump kernel failed\n"); - ret = -EINVAL; - goto out; - } - cmdline = modified_cmdline; - } -#endif - -#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY - /* Add purgatory to the image */ - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_load_purgatory(image, &kbuf); - if (ret) { - pr_err("Error loading purgatory ret=%d\n", ret); - goto out; - } - kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); - - ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", - &kernel_start, - sizeof(kernel_start), 0); - if (ret) - pr_err("Error update purgatory ret=%d\n", ret); -#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ - - /* Add the initrd to the image */ - if (initrd != NULL) { - kbuf.buffer = initrd; - kbuf.bufsz = kbuf.memsz = initrd_len; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - initrd_pbase = kbuf.mem; - kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); - } - - /* Add the DTB to the image */ - fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, - initrd_len, cmdline, 0); - if (!fdt) { - pr_err("Error setting up the new device tree.\n"); - ret = -EINVAL; - goto out; - } - - fdt_pack(fdt); - kbuf.buffer = fdt; - kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); - kbuf.buf_align = PAGE_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.top_down = true; - ret = kexec_add_buffer(&kbuf); - if (ret) { - pr_err("Error add DTB kbuf ret=%d\n", ret); - goto out_free_fdt; - } - /* Cache the fdt buffer address for memory cleanup */ - image->arch.fdt = fdt; - kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); - goto out; - -out_free_fdt: - kvfree(fdt); -out: - kfree(modified_cmdline); - kexec_free_elf_info(&elf_info); - return ret ? ERR_PTR(ret) : NULL; -} - -#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) -#define RISCV_IMM_BITS 12 -#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) -#define RISCV_CONST_HIGH_PART(x) \ - (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) -#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) - -#define ENCODE_ITYPE_IMM(x) \ - (RV_X(x, 0, 12) << 20) -#define ENCODE_BTYPE_IMM(x) \ - ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ - (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) -#define ENCODE_UTYPE_IMM(x) \ - (RV_X(x, 12, 20) << 12) -#define ENCODE_JTYPE_IMM(x) \ - ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ - (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) -#define ENCODE_CBTYPE_IMM(x) \ - ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) -#define ENCODE_CJTYPE_IMM(x) \ - ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ - (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) -#define ENCODE_UJTYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ - (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) -#define ENCODE_UITYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) - -#define CLEAN_IMM(type, x) \ - ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) - -int arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab) -{ - const char *strtab, *name, *shstrtab; - const Elf_Shdr *sechdrs; - Elf64_Rela *relas; - int i, r_type; - - /* String & section header string table */ - sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; - strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; - shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; - - relas = (void *)pi->ehdr + relsec->sh_offset; - - for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { - const Elf_Sym *sym; /* symbol to relocate */ - unsigned long addr; /* final location after relocation */ - unsigned long val; /* relocated symbol value */ - unsigned long sec_base; /* relocated symbol value */ - void *loc; /* tmp location to modify */ - - sym = (void *)pi->ehdr + symtab->sh_offset; - sym += ELF64_R_SYM(relas[i].r_info); - - if (sym->st_name) - name = strtab + sym->st_name; - else - name = shstrtab + sechdrs[sym->st_shndx].sh_name; - - loc = pi->purgatory_buf; - loc += section->sh_offset; - loc += relas[i].r_offset; - - if (sym->st_shndx == SHN_ABS) - sec_base = 0; - else if (sym->st_shndx >= pi->ehdr->e_shnum) { - pr_err("Invalid section %d for symbol %s\n", - sym->st_shndx, name); - return -ENOEXEC; - } else - sec_base = pi->sechdrs[sym->st_shndx].sh_addr; - - val = sym->st_value; - val += sec_base; - val += relas[i].r_addend; - - addr = section->sh_addr + relas[i].r_offset; - - r_type = ELF64_R_TYPE(relas[i].r_info); - - switch (r_type) { - case R_RISCV_BRANCH: - *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | - ENCODE_BTYPE_IMM(val - addr); - break; - case R_RISCV_JAL: - *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | - ENCODE_JTYPE_IMM(val - addr); - break; - /* - * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I - * sym is expected to be next to R_RISCV_PCREL_HI20 - * in purgatory relsec. Handle it like R_RISCV_CALL - * sym, instead of searching the whole relsec. - */ - case R_RISCV_PCREL_HI20: - case R_RISCV_CALL_PLT: - case R_RISCV_CALL: - *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | - ENCODE_UJTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_BRANCH: - *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | - ENCODE_CBTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_JUMP: - *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | - ENCODE_CJTYPE_IMM(val - addr); - break; - case R_RISCV_ADD16: - *(u16 *)loc += val; - break; - case R_RISCV_SUB16: - *(u16 *)loc -= val; - break; - case R_RISCV_ADD32: - *(u32 *)loc += val; - break; - case R_RISCV_SUB32: - *(u32 *)loc -= val; - break; - /* It has been applied by R_RISCV_PCREL_HI20 sym */ - case R_RISCV_PCREL_LO12_I: - case R_RISCV_ALIGN: - case R_RISCV_RELAX: - break; - case R_RISCV_64: - *(u64 *)loc = val; - break; - default: - pr_err("Unknown rela relocation: %d\n", r_type); - return -ENOEXEC; - } - } - return 0; -} - -const struct kexec_file_ops elf_kexec_ops = { - .probe = kexec_elf_probe, - .load = elf_kexec_load, -}; diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c new file mode 100644 index 000000000000..f4755d49b89e --- /dev/null +++ b/arch/riscv/kernel/kexec_elf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + struct elfhdr ehdr; + struct kexec_elf_info elf_info; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + ret = load_extra_segments(image, image->start, kernel_len, + initrd, initrd_len, cmdline, cmdline_len); +out: + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index b0bf8c1722c0..99bd5a5f4234 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -7,8 +7,368 @@ * Author: Liao Chang (liaochang1@huawei.com) */ #include +#include +#include +#include +#include +#include +#include +#include +#include const struct kexec_file_ops * const kexec_file_loaders[] = { &elf_kexec_ops, NULL }; + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + +#ifdef CONFIG_CRASH_DUMP +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr= exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} +#endif + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf64_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + case R_RISCV_64: + *(u64 *)loc = val; + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + + +int load_extra_segments(struct kimage *image, unsigned long kernel_start, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + void *fdt; + unsigned long initrd_pbase = 0UL; + struct kexec_buf kbuf; + char *modified_cmdline = NULL; + + kbuf.image = image; + kbuf.buf_min = kernel_start + kernel_len; + kbuf.buf_max = ULONG_MAX; + +#ifdef CONFIG_CRASH_DUMP + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); + + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ + + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; + kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kfree(modified_cmdline); + return ret; +} From 809a11eea8e8c80491e3ba3a286af25409c072d5 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Wed, 9 Apr 2025 21:29:59 +0200 Subject: [PATCH 47/69] riscv: kexec_file: Support loading Image binary file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch creates image_kexec_ops to load Image binary file for kexec_file_load() syscall. Signed-off-by: Song Shuai Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20250409193004.643839-3-bjorn@kernel.org Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/image.h | 2 + arch/riscv/include/asm/kexec.h | 1 + arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/kexec_image.c | 96 ++++++++++++++++++++++++++ arch/riscv/kernel/machine_kexec_file.c | 1 + 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/kexec_image.c diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h index e0b319af3681..8927a6ea1127 100644 --- a/arch/riscv/include/asm/image.h +++ b/arch/riscv/include/asm/image.h @@ -30,6 +30,8 @@ RISCV_HEADER_VERSION_MINOR) #ifndef __ASSEMBLY__ +#define riscv_image_flag_field(flags, field)\ + (((flags) >> field##_SHIFT) & field##_MASK) /** * struct riscv_image_header - riscv kernel image header * @code0: Executable code diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 518825fe4160..b9ee8346cc8c 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -56,6 +56,7 @@ extern riscv_kexec_method riscv_kexec_norelocate; #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops elf_kexec_ops; +extern const struct kexec_file_ops image_kexec_ops; struct purgatory_info; int arch_kexec_apply_relocations_add(struct purgatory_info *pi, diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d56305c8e631..0ead29826419 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -107,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o -obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o machine_kexec_file.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c new file mode 100644 index 000000000000..26a81774a78a --- /dev/null +++ b/arch/riscv/kernel/kexec_image.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V Kexec image loader + * + */ + +#define pr_fmt(fmt) "kexec_file(Image): " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +static int image_probe(const char *kernel_buf, unsigned long kernel_len) +{ + const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf; + + if (!h || kernel_len < sizeof(*h)) + return -EINVAL; + + /* According to Documentation/riscv/boot-image-header.rst, + * use "magic2" field to check when version >= 0.2. + */ + + if (h->version >= RISCV_HEADER_VERSION && + memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2))) + return -EINVAL; + + return 0; +} + +static void *image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct riscv_image_header *h; + u64 flags; + bool be_image, be_kernel; + struct kexec_buf kbuf; + int ret; + + /* Check Image header */ + h = (struct riscv_image_header *)kernel; + if (!h->image_size) { + ret = -EINVAL; + goto out; + } + + /* Check endianness */ + flags = le64_to_cpu(h->flags); + be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE); + be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + if (be_image != be_kernel) { + ret = -EINVAL; + goto out; + } + + /* Load the kernel image */ + kbuf.image = image; + kbuf.buf_min = 0; + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = false; + + kbuf.buffer = kernel; + kbuf.bufsz = kernel_len; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = le64_to_cpu(h->image_size); + kbuf.buf_align = le64_to_cpu(h->text_offset); + + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add kernel image ret=%d\n", ret); + goto out; + } + + image->start = kbuf.mem; + + pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kbuf.mem, kbuf.bufsz, kbuf.memsz); + + ret = load_extra_segments(image, kbuf.mem, kbuf.memsz, + initrd, initrd_len, cmdline, cmdline_len); + +out: + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops image_kexec_ops = { + .probe = image_probe, + .load = image_load, +}; diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 99bd5a5f4234..e36104af2e24 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -18,6 +18,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { &elf_kexec_ops, + &image_kexec_ops, NULL }; From 850d7b14c8f77407b7d2a1edeb83d3e36c46e7a8 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Wed, 26 Mar 2025 07:34:51 +0000 Subject: [PATCH 48/69] riscv/kexec_file: Fix comment in purgatory relocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently sec_base doesn't mean relocated symbol value, which seems a copy-pasting error in the comment. Assigned with the address of section indexed by sym->st_shndx, it should represent base address of the relevant section. Let's fix the comment to avoid possible confusion. Fixes: 838b3e28488f ("RISC-V: Load purgatory in kexec_file") Signed-off-by: Yao Zi Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20250326073450.57648-2-ziyao@disroot.org Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index e783a72d051f..0dc5450f2c7f 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -390,7 +390,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Sym *sym; /* symbol to relocate */ unsigned long addr; /* final location after relocation */ unsigned long val; /* relocated symbol value */ - unsigned long sec_base; /* relocated symbol value */ + unsigned long sec_base; /* relocated section base address */ void *loc; /* tmp location to modify */ sym = (void *)pi->ehdr + symtab->sh_offset; From f0f4e64b9e3527c11dc6dcb005e577d661eb9ab5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 21 Apr 2025 16:24:38 +0200 Subject: [PATCH 49/69] riscv: Introduce Zicbop instructions The S-type instructions are first introduced and then used to define the encoding of the Zicbop prefetching instructions. Co-developed-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Andrea Parri Link: https://lore.kernel.org/r/20250421142441.395849-2-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/insn-def.h | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 71060a2f838e..02c92c1657d2 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -18,6 +18,13 @@ #define INSN_I_RD_SHIFT 7 #define INSN_I_OPCODE_SHIFT 0 +#define INSN_S_SIMM7_SHIFT 25 +#define INSN_S_RS2_SHIFT 20 +#define INSN_S_RS1_SHIFT 15 +#define INSN_S_FUNC3_SHIFT 12 +#define INSN_S_SIMM5_SHIFT 7 +#define INSN_S_OPCODE_SHIFT 0 + #ifdef __ASSEMBLY__ #ifdef CONFIG_AS_HAS_INSN @@ -30,6 +37,10 @@ .insn i \opcode, \func3, \rd, \rs1, \simm12 .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .insn s \opcode, \func3, \rs2, \simm12(\rs1) + .endm + #else #include @@ -51,10 +62,20 @@ (\simm12 << INSN_I_SIMM12_SHIFT)) .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \ + (\func3 << INSN_S_FUNC3_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \ + ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \ + (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT)) + .endm + #endif #define __INSN_R(...) insn_r __VA_ARGS__ #define __INSN_I(...) insn_i __VA_ARGS__ +#define __INSN_S(...) insn_s __VA_ARGS__ #else /* ! __ASSEMBLY__ */ @@ -66,6 +87,9 @@ #define __INSN_I(opcode, func3, rd, rs1, simm12) \ ".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n" + #else #include @@ -92,12 +116,26 @@ " (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \ " .endm\n" +#define DEFINE_INSN_S \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \ +" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \ +" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \ +" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \ +" .endm\n" + #define UNDEFINE_INSN_R \ " .purgem insn_r\n" #define UNDEFINE_INSN_I \ " .purgem insn_i\n" +#define UNDEFINE_INSN_S \ +" .purgem insn_s\n" + #define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ DEFINE_INSN_R \ "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ @@ -108,6 +146,11 @@ "insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \ UNDEFINE_INSN_I +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + DEFINE_INSN_S \ + "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \ + UNDEFINE_INSN_S + #endif #endif /* ! __ASSEMBLY__ */ @@ -120,6 +163,10 @@ __INSN_I(RV_##opcode, RV_##func3, RV_##rd, \ RV_##rs1, RV_##simm12) +#define INSN_S(opcode, func3, rs2, simm12, rs1) \ + __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \ + RV_##simm12, RV_##rs1) + #define RV_OPCODE(v) __ASM_STR(v) #define RV_FUNC3(v) __ASM_STR(v) #define RV_FUNC7(v) __ASM_STR(v) @@ -133,6 +180,7 @@ #define RV___RS2(v) __RV_REG(v) #define RV_OPCODE_MISC_MEM RV_OPCODE(15) +#define RV_OPCODE_OP_IMM RV_OPCODE(19) #define RV_OPCODE_SYSTEM RV_OPCODE(115) #define HFENCE_VVMA(vaddr, asid) \ @@ -196,6 +244,18 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) +#define PREFETCH_I(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \ + SIMM12((offset) & 0xfe0), RS1(base)) + +#define PREFETCH_R(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \ + SIMM12((offset) & 0xfe0), RS1(base)) + +#define PREFETCH_W(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ + SIMM12((offset) & 0xfe0), RS1(base)) + #define RISCV_PAUSE ".4byte 0x100000f" #define ZAWRS_WRS_NTO ".4byte 0x00d00073" #define ZAWRS_WRS_STO ".4byte 0x01d00073" From 8d496b5a989120c1bce1ad8eb48ebae0350722d7 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 21 Apr 2025 16:24:39 +0200 Subject: [PATCH 50/69] riscv: Add support for Zicbop Zicbop introduces cache blocks prefetching instructions, add the necessary support for the kernel to use it in the coming commits. Co-developed-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Andrea Parri Link: https://lore.kernel.org/r/20250421142441.395849-3-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 15 +++++++++++++++ arch/riscv/include/asm/barrier.h | 5 ----- arch/riscv/include/asm/cacheflush.h | 1 + arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/include/asm/insn-def.h | 6 ++++++ arch/riscv/include/asm/processor.h | 1 - arch/riscv/kernel/cpufeature.c | 21 +++++++++++++++++++++ arch/riscv/mm/cacheflush.c | 14 +++++++++++--- 8 files changed, 55 insertions(+), 9 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..28765ce563de 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -842,6 +842,21 @@ config RISCV_ISA_ZICBOZ If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOP + bool "Zicbop extension support for cache block prefetch" + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZICBOP + extension (Cache Block Prefetch Operations) and enable its + usage. + + The Zicbop extension can be used to prefetch cache blocks for + read/write fetch. + + If you don't know what to do here, say Y. + config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index e1d9bf1deca6..b8c5726d86ac 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -14,11 +14,6 @@ #include #include -#define nop() __asm__ __volatile__ ("nop") -#define __nops(n) ".rept " #n "\nnop\n.endr\n" -#define nops(n) __asm__ __volatile__ (__nops(n)) - - /* These barriers need to enforce ordering on both devices or memory. */ #define __mb() RISCV_FENCE(iorw, iorw) #define __rmb() RISCV_FENCE(ir, ir) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa3..effa02c2e682 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -80,6 +80,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local); extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; +extern unsigned int riscv_cbop_block_size; void riscv_init_cbo_blocksizes(void); #ifdef CONFIG_RISCV_DMA_NONCOHERENT diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e3cbf203cdde..affd63e11b0a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define RISCV_ISA_EXT_ZVFBFWMA 96 #define RISCV_ISA_EXT_ZAAMO 97 #define RISCV_ISA_EXT_ZALRSC 98 +#define RISCV_ISA_EXT_ZICBOP 99 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 02c92c1657d2..d5adbaec1d01 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -263,4 +263,10 @@ #define RISCV_INSN_NOP4 _AC(0x00000013, U) +#ifndef __ASSEMBLY__ +#define nop() __asm__ __volatile__ ("nop") +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) __asm__ __volatile__ (__nops(n)) +#endif + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 5f56eb9d114a..09d4c963399a 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -52,7 +52,6 @@ #endif #ifndef __ASSEMBLY__ -#include struct task_struct; struct pt_regs; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 2054f6c4b0ae..743d53415572 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -32,6 +32,7 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbop; static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cbop_block_size) { + pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cbop_block_size)) { + pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicbop = true; + return 0; +} + static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), @@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void) current->thread.envcfg |= ENVCFG_CBCFE; else if (any_cpu_has_zicbom) pr_warn("Zicbom disabled as it is unavailable on some harts\n"); + + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) && + any_cpu_has_zicbop) + pr_warn("Zicbop disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index b81672729887..6265052ef8b6 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -101,6 +101,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size); unsigned int riscv_cboz_block_size; EXPORT_SYMBOL_GPL(riscv_cboz_block_size); +unsigned int riscv_cbop_block_size; +EXPORT_SYMBOL_GPL(riscv_cbop_block_size); + static void __init cbo_get_block_size(struct device_node *node, const char *name, u32 *block_size, unsigned long *first_hartid) @@ -125,8 +128,8 @@ static void __init cbo_get_block_size(struct device_node *node, void __init riscv_init_cbo_blocksizes(void) { - unsigned long cbom_hartid, cboz_hartid; - u32 cbom_block_size = 0, cboz_block_size = 0; + unsigned long cbom_hartid, cboz_hartid, cbop_hartid; + u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0; struct device_node *node; struct acpi_table_header *rhct; acpi_status status; @@ -138,13 +141,15 @@ void __init riscv_init_cbo_blocksizes(void) &cbom_block_size, &cbom_hartid); cbo_get_block_size(node, "riscv,cboz-block-size", &cboz_block_size, &cboz_hartid); + cbo_get_block_size(node, "riscv,cbop-block-size", + &cbop_block_size, &cbop_hartid); } } else { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); if (ACPI_FAILURE(status)) return; - acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL); + acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size); acpi_put_table((struct acpi_table_header *)rhct); } @@ -153,6 +158,9 @@ void __init riscv_init_cbo_blocksizes(void) if (cboz_block_size) riscv_cboz_block_size = cboz_block_size; + + if (cbop_block_size) + riscv_cbop_block_size = cbop_block_size; } #ifdef CONFIG_SMP From a5f947c73115efb6fb0d9579e71ce1ee5cf706aa Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 21 Apr 2025 16:24:40 +0200 Subject: [PATCH 51/69] riscv: Add ARCH_HAS_PREFETCH[W] support with Zicbop Enable Linux prefetch and prefetchw primitives using Zicbop. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20231231082955.16516-3-guoren@kernel.org Tested-by: Andrea Parri Link: https://lore.kernel.org/r/20250421142441.395849-4-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 09d4c963399a..39dfab495a4c 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -13,6 +13,9 @@ #include #include +#include +#include +#include #define arch_get_mmap_end(addr, len, flags) \ ({ \ @@ -135,6 +138,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) +#define PREFETCH_ASM(x) \ + ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \ + RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP) + +#define PREFETCHW_ASM(x) \ + ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \ + RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP) + +#ifdef CONFIG_RISCV_ISA_ZICBOP +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *x) +{ + __asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory"); +} + +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *x) +{ + __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory"); +} +#endif /* CONFIG_RISCV_ISA_ZICBOP */ /* Do necessary setup to start up a newly executed thread. */ extern void start_thread(struct pt_regs *regs, From eb87e56d651d0a72009842bc0d8eeae7b605c97d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 21 Apr 2025 16:24:41 +0200 Subject: [PATCH 52/69] riscv: xchg: Prefetch the destination word for sc.w The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch makes use of prefetch.w to prefetch cachelines for write prior to lr/sc loops when using the xchg_small atomic routine. This patch is inspired by commit 0ea366f5e1b6 ("arm64: atomics: prefetch the destination word for write prior to stxr"). Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20231231082955.16516-4-guoren@kernel.org Tested-by: Andrea Parri Link: https://lore.kernel.org/r/20250421142441.395849-5-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2ec119eb147b..0b749e710216 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -13,6 +13,7 @@ #include #include #include +#include #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ swap_append, r, p, n) \ @@ -37,6 +38,7 @@ \ __asm__ __volatile__ ( \ prepend \ + PREFETCHW_ASM(%5) \ "0: lr.w %0, %2\n" \ " and %1, %0, %z4\n" \ " or %1, %1, %z3\n" \ @@ -44,7 +46,7 @@ " bnez %1, 0b\n" \ sc_append \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ : "memory"); \ \ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ From c3cc2a4a3a23faf6b88459471c1c16ab3837cc2f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 21 Mar 2025 13:39:54 +0100 Subject: [PATCH 53/69] riscv: Add support for PUD THP Add the necessary page table functions to deal with PUD THP, this enables the use of PUD pfnmap. Link: https://lore.kernel.org/r/20250321123954.225097-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable-64.h | 5 +- arch/riscv/include/asm/pgtable.h | 97 +++++++++++++++++++++++++++++ arch/riscv/include/asm/tlbflush.h | 2 + arch/riscv/mm/pgtable.c | 10 +++ arch/riscv/mm/tlbflush.c | 7 +++ 6 files changed, 120 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..63ef4aa03506 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -143,6 +143,7 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 0897dd99ab8d..a2c00235c447 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -184,7 +184,7 @@ static inline int pud_none(pud_t pud) static inline int pud_bad(pud_t pud) { - return !pud_present(pud); + return !pud_present(pud) || (pud_val(pud) & _PAGE_LEAF); } #define pud_leaf pud_leaf @@ -401,6 +401,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pte_devmap(pte_t pte); static inline pte_t pmd_pte(pmd_t pmd); +static inline pte_t pud_pte(pud_t pud); static inline int pmd_devmap(pmd_t pmd) { @@ -409,7 +410,7 @@ static inline int pmd_devmap(pmd_t pmd) static inline int pud_devmap(pud_t pud) { - return 0; + return pte_devmap(pud_pte(pud)); } static inline int pgd_devmap(pgd_t pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 428e48e5f57d..b84e2ff83cb7 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -902,6 +902,103 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define pmdp_collapse_flush pmdp_collapse_flush extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); + +static inline pud_t pud_wrprotect(pud_t pud) +{ + return pte_pud(pte_wrprotect(pud_pte(pud))); +} + +static inline int pud_trans_huge(pud_t pud) +{ + return pud_leaf(pud); +} + +static inline int pud_dirty(pud_t pud) +{ + return pte_dirty(pud_pte(pud)); +} + +static inline pud_t pud_mkyoung(pud_t pud) +{ + return pte_pud(pte_mkyoung(pud_pte(pud))); +} + +static inline pud_t pud_mkold(pud_t pud) +{ + return pte_pud(pte_mkold(pud_pte(pud))); +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + return pte_pud(pte_mkdirty(pud_pte(pud))); +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + return pte_pud(pte_mkclean(pud_pte(pud))); +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + return pte_pud(pte_mkwrite_novma(pud_pte(pud))); +} + +static inline pud_t pud_mkhuge(pud_t pud) +{ + return pud; +} + +static inline pud_t pud_mkdevmap(pud_t pud) +{ + return pte_pud(pte_mkdevmap(pud_pte(pud))); +} + +static inline int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty) +{ + return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty); +} + +static inline int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp) +{ + return ptep_test_and_clear_young(vma, address, (pte_t *)pudp); +} + +static inline int pud_young(pud_t pud) +{ + return pte_young(pud_pte(pud)); +} + +static inline void update_mmu_cache_pud(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp) +{ + pte_t *ptep = (pte_t *)pudp; + + update_mmu_cache(vma, address, ptep); +} + +static inline pud_t pudp_establish(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(vma->vm_mm, pudp, pud); + return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud))); +} + +static inline pud_t pud_mkinvalid(pud_t pud) +{ + return __pud(pud_val(pud) & ~(_PAGE_PRESENT | _PAGE_PROT_NONE)); +} + +extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp); + +static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) +{ + return pte_pud(pte_modify(pud_pte(pud), newprot)); +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index ce0dd0fed764..1a20dd746a49 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -56,6 +56,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #endif bool arch_tlbbatch_should_defer(struct mm_struct *mm); diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 4ae67324f992..8b6c0a112a8d 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -154,4 +154,14 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); return pmd; } + +pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp) +{ + VM_WARN_ON_ONCE(!pud_present(*pudp)); + pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp)); + + flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); + return old; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index f9e27ba1df99..97c8fde3cbfe 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -182,6 +182,13 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE); } + +void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), + start, end - start, PUD_SIZE); +} #endif bool arch_tlbbatch_should_defer(struct mm_struct *mm) From be17c0df67959fe4f88dac75dc26ed9252d4b133 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 9 Apr 2025 10:14:51 -0700 Subject: [PATCH 54/69] riscv: module: Optimize PLT/GOT entry counting perf reports that 99.63% of the cycles from `modprobe amdgpu` are spent inside module_frob_arch_sections(). This is because amdgpu.ko contains about 300000 relocations in its .rela.text section, and the algorithm in count_max_entries() takes quadratic time. Apply two optimizations from the arm64 code, which together reduce the total execution time by 99.58%. First, sort the relocations so duplicate entries are adjacent. Second, reduce the number of relocations that must be sorted by filtering to only relocations that need PLT/GOT entries, as done in commit d4e0340919fb ("arm64/module: Optimize module load time by optimizing PLT counting"). Unlike the arm64 code, here the filtering and sorting is done in a scratch buffer, because the HI20 relocation search optimization in apply_relocate_add() depends on the original order of the relocations. This allows accumulating PLT/GOT relocations across sections so sorting and counting is only done once per module. Signed-off-by: Samuel Holland Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250409171526.862481-3-samuel.holland@sifive.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module-sections.c | 81 +++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 91d0b355ceef..75551ac6504c 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -9,6 +9,7 @@ #include #include #include +#include unsigned long module_emit_got_entry(struct module *mod, unsigned long val) { @@ -55,44 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val) return (unsigned long)&plt[i]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) { - return x->r_info == y->r_info && x->r_addend == y->r_addend; + const Elf_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(x->r_info, y->r_info); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; } static bool duplicate_rela(const Elf_Rela *rela, int idx) { - int i; - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } - return false; + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding slot. + */ + return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0; } -static void count_max_entries(Elf_Rela *relas, int num, +static void count_max_entries(const Elf_Rela *relas, size_t num, unsigned int *plts, unsigned int *gots) { - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { + if (duplicate_rela(relas, i)) + continue; + switch (ELF_R_TYPE(relas[i].r_info)) { case R_RISCV_CALL_PLT: case R_RISCV_PLT32: - if (!duplicate_rela(relas, i)) - (*plts)++; + (*plts)++; break; case R_RISCV_GOT_HI20: - if (!duplicate_rela(relas, i)) - (*gots)++; + (*gots)++; break; + default: + unreachable(); } } } +static bool rela_needs_plt_got_entry(const Elf_Rela *rela) +{ + switch (ELF_R_TYPE(rela->r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_GOT_HI20: + case R_RISCV_PLT32: + return true; + default: + return false; + } +} + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { + size_t num_scratch_relas = 0; unsigned int num_plts = 0; unsigned int num_gots = 0; + Elf_Rela *scratch = NULL; + size_t scratch_size = 0; int i; /* @@ -122,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* Calculate the maxinum number of entries */ for (i = 0; i < ehdr->e_shnum; i++) { + size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; - int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + size_t scratch_size_needed; if (sechdrs[i].sh_type != SHT_RELA) continue; @@ -133,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts, &num_gots); + /* + * apply_relocate_add() relies on HI20 and LO12 relocation pairs being + * close together, so sort a copy of the section to avoid interfering. + */ + scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); + if (scratch_size_needed > scratch_size) { + scratch_size = scratch_size_needed; + scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!scratch) + return -ENOMEM; + } + + for (size_t j = 0; j < num_relas; j++) + if (rela_needs_plt_got_entry(&relas[j])) + scratch[num_scratch_relas++] = relas[j]; + } + + if (scratch) { + /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */ + sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL); + count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots); + kvfree(scratch); } mod->arch.plt.shdr->sh_type = SHT_NOBITS; From 48d9aabf2dc5d93b402ce55e3187e95698f2b9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Fri, 28 Mar 2025 10:14:22 +0000 Subject: [PATCH 55/69] RISC-V: Kconfig: Fix help text of CMDLINE_EXTEND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is the built-in command line appended to the bootloader command line, not the bootloader command line appended to the built-in command line. Fixes: 3aed8c43267e ("RISC-V: Update Kconfig to better handle CMDLINE") Signed-off-by: 谢致邦 (XIE Zhibang) Link: https://lore.kernel.org/r/tencent_A93C7FB46BFD20054AD2FEF4645913FF550A@qq.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 674cf6ff7188..78640cd353fd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1176,8 +1176,8 @@ config CMDLINE_FALLBACK config CMDLINE_EXTEND bool "Extend bootloader kernel arguments" help - The command-line arguments provided during boot will be - appended to the built-in command line. This is useful in + The built-in command line will be appended to the command- + line arguments provided during boot. This is useful in cases where the provided arguments are insufficient and you don't want to or cannot modify them. From 4d6319289e8661a1845dde5d05859afc21ec3ed9 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Wed, 9 Apr 2025 10:51:56 +0800 Subject: [PATCH 56/69] perf symbols: Ignore mapping symbols on riscv RISCV ELF use mapping symbols with special names $x, $d to identify regions of RISCV code or code with different ISAs[1]. These symbols don't identify functions, so will confuse the perf output. The patch filters out these symbols at load time, similar to "4886f2ca perf symbols: Ignore mapping symbols on aarch64". [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/ master/riscv-elf.adoc#mapping-symbol Signed-off-by: Haibo Xu Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20250409025202.201046-1-haibo1.xu@intel.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- tools/perf/util/symbol-elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index fbf6d0f73af9..55b1409b0593 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1733,6 +1733,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, continue; } + /* Reject RISCV ELF "mapping symbols" */ + if (ehdr.e_machine == EM_RISCV) { + if (elf_name[0] == '$' && strchr("dx", elf_name[1])) + continue; + } + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; From d7e0cce103663a60b14c52a0bcad62c7e8c0e6e8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 22 Apr 2025 19:31:56 +0800 Subject: [PATCH 57/69] riscv: Make regs_irqs_disabled() more clear The return value of regs_irqs_disabled() is true or false, so change its type to reflect that and also make it always inline. Suggested-by: Huacai Chen Signed-off-by: Tiezhu Yang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422113156.25742-1-yangtiezhu@loongson.cn Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2910231977cb..a7dc0e330757 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -175,7 +175,7 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return 0; } -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { return !(regs->status & SR_PIE); } From 415a8c81da3dab0a585bd4f8d505a11ad5a171a7 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 21 Apr 2025 16:14:13 +0200 Subject: [PATCH 58/69] riscv: hwprobe: export Zabha extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export Zabha through the hwprobe syscall. Reviewed-by: Clément Léger Link: https://lore.kernel.org/r/20250421141413.394444-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/hwprobe.rst | 4 ++++ arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/kernel/sys_hwprobe.c | 1 + 3 files changed, 6 insertions(+) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index f60bf5991755..a4998ad2dfe0 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -271,6 +271,10 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + * :c:macro:`RISCV_HWPROBE_EXT_ZABHA`: The Zabha extension is supported as + ratified in commit 49f49c842ff9 ("Update to Rafified state") of + riscv-zabha. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3c2fce939673..fca15f2bf6f3 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -81,6 +81,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) #define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) #define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) +#define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 249aec8594a9..ed3123396a96 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -96,6 +96,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * presence in the hart_isa bitmap, are made. */ EXT_KEY(ZAAMO); + EXT_KEY(ZABHA); EXT_KEY(ZACAS); EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); From a4348546332c9fae12b29acb514535e0a52b9b3c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 2 Jun 2025 21:39:14 +0200 Subject: [PATCH 59/69] riscv: make unsafe user copy routines use existing assembly routines The current implementation is underperforming and in addition, it triggers misaligned access traps on platforms which do not handle misaligned accesses in hardware. Use the existing assembly routines to solve both problems at once. Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250602193918.868962-2-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm-prototypes.h | 2 +- arch/riscv/include/asm/uaccess.h | 33 ++++------------ arch/riscv/lib/riscv_v_helpers.c | 11 ++++-- arch/riscv/lib/uaccess.S | 50 +++++++++++++++++-------- arch/riscv/lib/uaccess_vector.S | 15 ++++++-- 5 files changed, 63 insertions(+), 48 deletions(-) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index cd627ec289f1..5d10edde6d17 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -12,7 +12,7 @@ long long __ashlti3(long long a, int b); #ifdef CONFIG_RISCV_ISA_V #ifdef CONFIG_MMU -asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, bool enable_sum); #endif /* CONFIG_MMU */ void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 87d01168f80a..046de7ced09c 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -450,35 +450,18 @@ static inline void user_access_restore(unsigned long enabled) { } (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define unsafe_copy_loop(dst, src, len, type, op, label) \ - while (len >= sizeof(type)) { \ - op(*(type *)(src), (type __user *)(dst), label); \ - dst += sizeof(type); \ - src += sizeof(type); \ - len -= sizeof(type); \ - } +unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to, + const void *from, unsigned long n); +unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to, + const void __user *from, unsigned long n); #define unsafe_copy_to_user(_dst, _src, _len, label) \ -do { \ - char __user *__ucu_dst = (_dst); \ - const char *__ucu_src = (_src); \ - size_t __ucu_len = (_len); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, unsafe_put_user, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, unsafe_put_user, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, unsafe_put_user, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, unsafe_put_user, label); \ -} while (0) + if (__asm_copy_to_user_sum_enabled(_dst, _src, _len)) \ + goto label; #define unsafe_copy_from_user(_dst, _src, _len, label) \ -do { \ - char *__ucu_dst = (_dst); \ - const char __user *__ucu_src = (_src); \ - size_t __ucu_len = (_len); \ - unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u64, unsafe_get_user, label); \ - unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u32, unsafe_get_user, label); \ - unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u16, unsafe_get_user, label); \ - unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u8, unsafe_get_user, label); \ -} while (0) + if (__asm_copy_from_user_sum_enabled(_dst, _src, _len)) \ + goto label; #else /* CONFIG_MMU */ #include diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c index be38a93cedae..7bbdfc6d4552 100644 --- a/arch/riscv/lib/riscv_v_helpers.c +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -16,8 +16,11 @@ #ifdef CONFIG_MMU size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; int __asm_vector_usercopy(void *dst, void *src, size_t n); +int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n); int fallback_scalar_usercopy(void *dst, void *src, size_t n); -asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, + bool enable_sum) { size_t remain, copied; @@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) goto fallback; kernel_vector_begin(); - remain = __asm_vector_usercopy(dst, src, n); + remain = enable_sum ? __asm_vector_usercopy(dst, src, n) : + __asm_vector_usercopy_sum_enabled(dst, src, n); kernel_vector_end(); if (remain) { @@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) return remain; fallback: - return fallback_scalar_usercopy(dst, src, n); + return enable_sum ? fallback_scalar_usercopy(dst, src, n) : + fallback_scalar_usercopy_sum_enabled(dst, src, n); } #endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 6a9f116bb545..4efea1b3326c 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user) ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) REG_L t0, riscv_v_usercopy_threshold bltu a2, t0, fallback_scalar_usercopy - tail enter_vector_usercopy + li a3, 1 + tail enter_vector_usercopy #endif +SYM_FUNC_END(__asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_from_user) + SYM_FUNC_START(fallback_scalar_usercopy) - /* Enable access to user memory */ - li t6, SR_SUM - csrs CSR_STATUS, t6 + li t6, SR_SUM + csrs CSR_STATUS, t6 + mv t6, ra + call fallback_scalar_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(fallback_scalar_usercopy) + +SYM_FUNC_START(__asm_copy_to_user_sum_enabled) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy_sum_enabled + li a3, 0 + tail enter_vector_usercopy +#endif +SYM_FUNC_END(__asm_copy_to_user_sum_enabled) +SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled) + +SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled) /* * Save the terminal address which will be used to compute the number * of bytes copied in case of a fixup exception. @@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy) bltu a0, t0, 4b /* t0 - end of dst */ .Lout_copy_user: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 li a0, 0 ret - - /* Exception fixup code */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 sub a0, t5, a0 ret -SYM_FUNC_END(__asm_copy_to_user) -SYM_FUNC_END(fallback_scalar_usercopy) -EXPORT_SYMBOL(__asm_copy_to_user) -SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) -EXPORT_SYMBOL(__asm_copy_from_user) - +SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled) SYM_FUNC_START(__clear_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S index 7c45f26de4f7..03b5560609a2 100644 --- a/arch/riscv/lib/uaccess_vector.S +++ b/arch/riscv/lib/uaccess_vector.S @@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy) /* Enable access to user memory */ li t6, SR_SUM csrs CSR_STATUS, t6 + mv t6, ra + call __asm_vector_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(__asm_vector_usercopy) + +SYM_FUNC_START(__asm_vector_usercopy_sum_enabled) loop: vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma fixup vle8.v vData, (pSrc), 10f @@ -36,8 +47,6 @@ loop: /* Exception fixup for vector load is shared with normal exit */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 mv a0, iNum ret @@ -49,4 +58,4 @@ loop: csrr t2, CSR_VSTART sub iNum, iNum, t2 j 10b -SYM_FUNC_END(__asm_vector_usercopy) +SYM_FUNC_END(__asm_vector_usercopy_sum_enabled) From 020667d661f9be65167174d28a6eda7102f7293f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 2 Jun 2025 21:39:15 +0200 Subject: [PATCH 60/69] riscv: process: use unsigned int instead of unsigned long for put_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification of prctl() for GET_UNALIGN_CTL states that the value is returned in an unsigned int * address passed as an unsigned long. Change the type to match that and avoid an unaligned access as well. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250602193918.868962-3-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de77180..fe10e326f44e 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -57,7 +57,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) if (!unaligned_ctl_available()) return -EINVAL; - return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); + return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr); } void __show_regs(struct pt_regs *regs) From ca1a66cdd685030738cf077e3955fdedfe39fbb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 2 Jun 2025 21:39:16 +0200 Subject: [PATCH 61/69] riscv: uaccess: do not do misaligned accesses in get/put_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing misaligned access to userspace memory would make a trap on platform where it is emulated. Latest fixes removed the kernel capability to do unaligned accesses to userspace memory safely since interrupts are kept disabled at all time during that. Thus doing so would crash the kernel. Such behavior was detected with GET_UNALIGN_CTL() that was doing a put_user() with an unsigned long* address that should have been an unsigned int*. Reenabling kernel misaligned access emulation is a bit risky and it would also degrade performances. Rather than doing that, we will try to avoid any misaligned accessed by using copy_from/to_user() which does not do any misaligned accesses. This can be done only for !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS and thus allows to only generate a bit more code for this config. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250602193918.868962-4-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 046de7ced09c..d472da4450e6 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -169,8 +169,19 @@ do { \ #endif /* CONFIG_64BIT */ +unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to, + const void *from, unsigned long n); +unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to, + const void __user *from, unsigned long n); + #define __get_user_nocheck(x, __gu_ptr, label) \ do { \ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ + if (__asm_copy_from_user_sum_enabled(&(x), __gu_ptr, sizeof(*__gu_ptr))) \ + goto label; \ + break; \ + } \ switch (sizeof(*__gu_ptr)) { \ case 1: \ __get_user_asm("lb", (x), __gu_ptr, label); \ @@ -297,6 +308,13 @@ do { \ #define __put_user_nocheck(x, __gu_ptr, label) \ do { \ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ + __inttype(x) val = (__inttype(x))x; \ + if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(val), sizeof(*__gu_ptr))) \ + goto label; \ + break; \ + } \ switch (sizeof(*__gu_ptr)) { \ case 1: \ __put_user_asm("sb", (x), __gu_ptr, label); \ @@ -450,11 +468,6 @@ static inline void user_access_restore(unsigned long enabled) { } (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to, - const void *from, unsigned long n); -unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to, - const void __user *from, unsigned long n); - #define unsafe_copy_to_user(_dst, _src, _len, label) \ if (__asm_copy_to_user_sum_enabled(_dst, _src, _len)) \ goto label; From 9811c864f5d72effcaf476a80645f0ad3de4469d Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 5 May 2025 14:27:38 -0700 Subject: [PATCH 62/69] MAINTAINERS: Update Atish's email address My personal upstream email account was previously based on gmail which has become difficult to manage upstream activities lately. Update it to the more reliable linux.dev account. Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20250505-update_email_address-v1-1-1c24db506fdb@rivosinc.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- .mailmap | 3 ++- MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 9afde79e1936..f7a81702309e 100644 --- a/.mailmap +++ b/.mailmap @@ -105,7 +105,8 @@ Arun Kumar Neelakantam Ashok Raj Nagarajan Ashwin Chaugule Asutosh Das -Atish Patra +Atish Patra +Atish Patra Avaneesh Kumar Dwivedi Axel Dyks Axel Lin diff --git a/MAINTAINERS b/MAINTAINERS index fa1e04e87d1d..050ffca19997 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13057,7 +13057,7 @@ F: arch/powerpc/kvm/ KERNEL VIRTUAL MACHINE FOR RISC-V (KVM/riscv) M: Anup Patel -R: Atish Patra +R: Atish Patra L: kvm@vger.kernel.org L: kvm-riscv@lists.infradead.org L: linux-riscv@lists.infradead.org @@ -20852,7 +20852,7 @@ F: arch/riscv/boot/dts/sifive/ F: arch/riscv/boot/dts/starfive/ RISC-V PMU DRIVERS -M: Atish Patra +M: Atish Patra R: Anup Patel L: linux-riscv@lists.infradead.org S: Supported From c39d53750ff96b282c869a0184a7c3ecfd298ca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Thu, 1 May 2025 15:03:09 +0200 Subject: [PATCH 63/69] riscv: Improve Kconfig help for RISCV_ISA_V_PREEMPTIVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a couple of spelling issues plus some minor details on the grammar. Signed-off-by: Miquel Sabaté Solà Link: https://lore.kernel.org/r/20250501130309.14803-1-mikisabate@gmail.com Reviewed-by: Alexandre Ghiti Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a93af30727ee..98a3ecdc65f6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -669,12 +669,12 @@ config RISCV_ISA_V_PREEMPTIVE default y help Usually, in-kernel SIMD routines are run with preemption disabled. - Functions which envoke long running SIMD thus must yield core's + Functions which invoke long running SIMD thus must yield the core's vector unit to prevent blocking other tasks for too long. - This config allows kernel to run SIMD without explicitly disable - preemption. Enabling this config will result in higher memory - consumption due to the allocation of per-task's kernel Vector context. + This config allows the kernel to run SIMD without explicitly disabling + preemption. Enabling this config will result in higher memory consumption + due to the allocation of per-task's kernel Vector context. config RISCV_ISA_ZAWRS bool "Zawrs extension support for more efficient busy waiting" From 7bc76fb3883aaba56b16dc4009ba8a490b8dc6ab Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 3 Jun 2025 10:28:57 -0700 Subject: [PATCH 64/69] RISC-V: Documentation: Add enough title underlines to CMODX This reports as a warning in linux-next along the lines of Documentation/arch/riscv/cmodx.rst:14: WARNING: Title underline too short. CMODX in the Kernel Space --------------------- [docutils] Documentation/arch/riscv/cmodx.rst:43: WARNING: Title underline too short. CMODX in the User Space --------------------- [docutils] Documentation/arch/riscv/cmodx.rst:43: WARNING: Title underline too short. CMODX in the User Space --------------------- [docutils] Link: https://lore.kernel.org/all/20250603154544.1602a8b5@canb.auug.org.au/ Fixes: 0e07200b2af6 ("riscv: Documentation: add a description about dynamic ftrace") Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250603172856.49925-1-palmer@dabbelt.com Signed-off-by: Palmer Dabbelt --- Documentation/arch/riscv/cmodx.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index e009873b2d17..40ba53bed5df 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -11,7 +11,7 @@ program must enforce its own synchronization with the unprivileged fence.i instruction. CMODX in the Kernel Space ---------------------- +------------------------- Dynamic ftrace --------------------- @@ -40,7 +40,7 @@ pacthable function. The metadata is resolved at the first trampoline, then the execution can be derect to another custom trampoline. CMODX in the User Space ---------------------- +----------------------- Though fence.i is an unprivileged instruction, the default Linux ABI prohibits the use of fence.i in userspace applications. At any point the scheduler may From a56972698810089d8f1bdc296cd709726db7176b Mon Sep 17 00:00:00 2001 From: Mayuresh Chitale Date: Tue, 2 Jul 2024 15:56:37 +0530 Subject: [PATCH 65/69] riscv: mm: Add support for Svinval extension The Svinval extension splits SFENCE.VMA instruction into finer-grained invalidation and ordering operations and is mandatory for RVA23S64 profile. When Svinval is enabled the local_flush_tlb_range_threshold_asid function should use the following sequence to optimize the tlb flushes instead of a simple sfence.vma: sfence.w.inval svinval.vma . . svinval.vma sfence.inval.ir The maximum number of consecutive svinval.vma instructions that can be executed in local_flush_tlb_range_threshold_asid function is limited to 64. This is required to avoid soft lockups and the approach is similar to that used in arm64. Signed-off-by: Mayuresh Chitale Reviewed-by: Andrew Jones Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240702102637.9074-1-mchitale@ventanamicro.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index f9e27ba1df99..6289ed5c7eb4 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -7,6 +7,27 @@ #include #include #include +#include + +#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) + +static inline void local_sfence_inval_ir(void) +{ + asm volatile(SFENCE_INVAL_IR() ::: "memory"); +} + +static inline void local_sfence_w_inval(void) +{ + asm volatile(SFENCE_W_INVAL() ::: "memory"); +} + +static inline void local_sinval_vma(unsigned long vma, unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory"); + else + asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory"); +} /* * Flush entire TLB if number of entries to be flushed is greater @@ -27,6 +48,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start, return; } + if (has_svinval()) { + local_sfence_w_inval(); + for (i = 0; i < nr_ptes_in_range; ++i) { + local_sinval_vma(start, asid); + start += stride; + } + local_sfence_inval_ir(); + return; + } + for (i = 0; i < nr_ptes_in_range; ++i) { local_flush_tlb_page_asid(start, asid); start += stride; From 6093faaf9593fca92f96f165c95ff4b53353b1f4 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 5 Mar 2025 16:37:06 +0800 Subject: [PATCH 66/69] raid6: Add RISC-V SIMD syndrome and recovery calculations The assembly is originally based on the ARM NEON and int.uc, but uses RISC-V vector instructions to implement the RAID6 syndrome and recovery calculations. The functions are tested on QEMU running with the option "-icount shift=0": raid6: rvvx1 gen() 1008 MB/s raid6: rvvx2 gen() 1395 MB/s raid6: rvvx4 gen() 1584 MB/s raid6: rvvx8 gen() 1694 MB/s raid6: int64x8 gen() 113 MB/s raid6: int64x4 gen() 116 MB/s raid6: int64x2 gen() 272 MB/s raid6: int64x1 gen() 229 MB/s raid6: using algorithm rvvx8 gen() 1694 MB/s raid6: .... xor() 1000 MB/s, rmw enabled raid6: using rvv recovery algorithm [Charlie: - Fixup vector options] Signed-off-by: Charlie Jenkins Signed-off-by: Chunyan Zhang Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250305083707.74218-1-zhangchunyan@iscas.ac.cn Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- include/linux/raid/pq.h | 5 + lib/raid6/Makefile | 1 + lib/raid6/algos.c | 9 + lib/raid6/recov_rvv.c | 229 ++++++++ lib/raid6/rvv.c | 1212 +++++++++++++++++++++++++++++++++++++++ lib/raid6/rvv.h | 39 ++ 6 files changed, 1495 insertions(+) create mode 100644 lib/raid6/recov_rvv.c create mode 100644 lib/raid6/rvv.c create mode 100644 lib/raid6/rvv.h diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 98030accf641..72ff44cca864 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; extern const struct raid6_calls raid6_lsx; extern const struct raid6_calls raid6_lasx; +extern const struct raid6_calls raid6_rvvx1; +extern const struct raid6_calls raid6_rvvx2; +extern const struct raid6_calls raid6_rvvx4; +extern const struct raid6_calls raid6_rvvx8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; extern const struct raid6_recov_calls raid6_recov_lsx; extern const struct raid6_recov_calls raid6_recov_lasx; +extern const struct raid6_recov_calls raid6_recov_rvv; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 29127dd05d63..5be0a4e60ab1 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -10,6 +10,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o +raid6_pq-$(CONFIG_RISCV_ISA_V) += rvv.o recov_rvv.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index cd2e88ee1f14..99980ff5b985 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -80,6 +80,12 @@ const struct raid6_calls * const raid6_algos[] = { #ifdef CONFIG_CPU_HAS_LSX &raid6_lsx, #endif +#endif +#ifdef CONFIG_RISCV_ISA_V + &raid6_rvvx1, + &raid6_rvvx2, + &raid6_rvvx4, + &raid6_rvvx8, #endif &raid6_intx8, &raid6_intx4, @@ -115,6 +121,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #ifdef CONFIG_CPU_HAS_LSX &raid6_recov_lsx, #endif +#endif +#ifdef CONFIG_RISCV_ISA_V + &raid6_recov_rvv, #endif &raid6_recov_intx1, NULL diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c new file mode 100644 index 000000000000..f29303795ccf --- /dev/null +++ b/lib/raid6/recov_rvv.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Institute of Software, CAS. + * Author: Chunyan Zhang + */ + +#include +#include +#include +#include + +static int rvv_has_vector(void) +{ + return has_vector(); +} + +static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, + u8 *dq, const u8 *pbmul, + const u8 *qmul) +{ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli x0, %[avl], e8, m1, ta, ma\n" + ".option pop\n" + : : + [avl]"r"(16) + ); + + /* + * while ( bytes-- ) { + * uint8_t px, qx, db; + * + * px = *p ^ *dp; + * qx = qmul[*q ^ *dq]; + * *dq++ = db = pbmul[px] ^ qx; + * *dp++ = db ^ px; + * p++; q++; + * } + */ + while (bytes) { + /* + * v0:px, v1:dp, + * v2:qx, v3:dq, + * v4:vx, v5:vy, + * v6:qm0, v7:qm1, + * v8:pm0, v9:pm1, + * v14:p/qm[vx], v15:p/qm[vy] + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[px])\n" + "vle8.v v1, (%[dp])\n" + "vxor.vv v0, v0, v1\n" + "vle8.v v2, (%[qx])\n" + "vle8.v v3, (%[dq])\n" + "vxor.vv v4, v2, v3\n" + "vsrl.vi v5, v4, 4\n" + "vand.vi v4, v4, 0xf\n" + "vle8.v v6, (%[qm0])\n" + "vle8.v v7, (%[qm1])\n" + "vrgather.vv v14, v6, v4\n" /* v14 = qm[vx] */ + "vrgather.vv v15, v7, v5\n" /* v15 = qm[vy] */ + "vxor.vv v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */ + + "vsrl.vi v5, v0, 4\n" + "vand.vi v4, v0, 0xf\n" + "vle8.v v8, (%[pm0])\n" + "vle8.v v9, (%[pm1])\n" + "vrgather.vv v14, v8, v4\n" /* v14 = pm[vx] */ + "vrgather.vv v15, v9, v5\n" /* v15 = pm[vy] */ + "vxor.vv v4, v14, v15\n" /* v4 = pbmul[px] */ + "vxor.vv v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */ + "vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */ + "vse8.v v3, (%[dq])\n" + "vse8.v v1, (%[dp])\n" + ".option pop\n" + : : + [px]"r"(p), + [dp]"r"(dp), + [qx]"r"(q), + [dq]"r"(dq), + [qm0]"r"(qmul), + [qm1]"r"(qmul + 16), + [pm0]"r"(pbmul), + [pm1]"r"(pbmul + 16) + :); + + bytes -= 16; + p += 16; + q += 16; + dp += 16; + dq += 16; + } +} + +static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q, + u8 *dq, const u8 *qmul) +{ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli x0, %[avl], e8, m1, ta, ma\n" + ".option pop\n" + : : + [avl]"r"(16) + ); + + /* + * while (bytes--) { + * *p++ ^= *dq = qmul[*q ^ *dq]; + * q++; dq++; + * } + */ + while (bytes) { + /* + * v0:vx, v1:vy, + * v2:dq, v3:p, + * v4:qm0, v5:qm1, + * v10:m[vx], v11:m[vy] + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[vx])\n" + "vle8.v v2, (%[dq])\n" + "vxor.vv v0, v0, v2\n" + "vsrl.vi v1, v0, 4\n" + "vand.vi v0, v0, 0xf\n" + "vle8.v v4, (%[qm0])\n" + "vle8.v v5, (%[qm1])\n" + "vrgather.vv v10, v4, v0\n" + "vrgather.vv v11, v5, v1\n" + "vxor.vv v0, v10, v11\n" + "vle8.v v1, (%[vy])\n" + "vxor.vv v1, v0, v1\n" + "vse8.v v0, (%[dq])\n" + "vse8.v v1, (%[vy])\n" + ".option pop\n" + : : + [vx]"r"(q), + [vy]"r"(p), + [dq]"r"(dq), + [qm0]"r"(qmul), + [qm1]"r"(qmul + 16) + :); + + bytes -= 16; + p += 16; + q += 16; + dq += 16; + } +} + +static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_vector_begin(); + __raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul); + kernel_vector_end(); +} + +static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_vector_begin(); + __raid6_datap_recov_rvv(bytes, p, q, dq, qmul); + kernel_vector_end(); +} + +const struct raid6_recov_calls raid6_recov_rvv = { + .data2 = raid6_2data_recov_rvv, + .datap = raid6_datap_recov_rvv, + .valid = rvv_has_vector, + .name = "rvv", + .priority = 1, +}; diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c new file mode 100644 index 000000000000..f0887344b274 --- /dev/null +++ b/lib/raid6/rvv.c @@ -0,0 +1,1212 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RAID-6 syndrome calculation using RISC-V vector instructions + * + * Copyright 2024 Institute of Software, CAS. + * Author: Chunyan Zhang + * + * Based on neon.uc: + * Copyright 2002-2004 H. Peter Anvin + */ + +#include +#include +#include +#include +#include +#include "rvv.h" + +#define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ + +static int rvv_has_vector(void) +{ + return has_vector(); +} + +static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + unsigned long d; + int z, z0; + u8 *p, *q; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0 + 1]; /* XOR parity */ + q = dptr[z0 + 2]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ + for (d = 0; d < bytes; d += NSIZE * 1) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + ); + + for (z = z0 - 1 ; z >= 0 ; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] = wp$$; + * *(unative_t *)&q[d+NSIZE*$$] = wq$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vse8.v v0, (%[wp0])\n" + "vse8.v v1, (%[wq0])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]) + ); + } +} + +static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, + unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + unsigned long d; + int z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks - 2]; /* XOR parity */ + q = dptr[disks - 1]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ + for (d = 0 ; d < bytes ; d += NSIZE * 1) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + ); + + /* P/Q data pages */ + for (z = z0 - 1; z >= start; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* P/Q left side optimization */ + for (z = start - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * wq$$ = w1$$ ^ w2$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v1, v3, v2\n" + ".option pop\n" + : : + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + * v0:wp0, v1:wq0, v2:p0, v3:q0 + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v2, (%[wp0])\n" + "vle8.v v3, (%[wq0])\n" + "vxor.vv v2, v2, v0\n" + "vxor.vv v3, v3, v1\n" + "vse8.v v2, (%[wp0])\n" + "vse8.v v3, (%[wq0])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]) + ); + } +} + +static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + unsigned long d; + int z, z0; + u8 *p, *q; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0 + 1]; /* XOR parity */ + q = dptr[z0 + 2]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + */ + for (d = 0; d < bytes; d += NSIZE * 2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + ); + + for (z = z0 - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] = wp$$; + * *(unative_t *)&q[d+NSIZE*$$] = wq$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vse8.v v0, (%[wp0])\n" + "vse8.v v1, (%[wq0])\n" + "vse8.v v4, (%[wp1])\n" + "vse8.v v5, (%[wq1])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]) + ); + } +} + +static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, + unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + unsigned long d; + int z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks - 2]; /* XOR parity */ + q = dptr[disks - 1]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + */ + for (d = 0; d < bytes; d += NSIZE * 2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + ); + + /* P/Q data pages */ + for (z = z0 - 1; z >= start; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* P/Q left side optimization */ + for (z = start - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * wq$$ = w1$$ ^ w2$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v1, v3, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v5, v7, v6\n" + ".option pop\n" + : : + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + * v0:wp0, v1:wq0, v2:p0, v3:q0 + * v4:wp1, v5:wq1, v6:p1, v7:q1 + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v2, (%[wp0])\n" + "vle8.v v3, (%[wq0])\n" + "vxor.vv v2, v2, v0\n" + "vxor.vv v3, v3, v1\n" + "vse8.v v2, (%[wp0])\n" + "vse8.v v3, (%[wq0])\n" + + "vle8.v v6, (%[wp1])\n" + "vle8.v v7, (%[wq1])\n" + "vxor.vv v6, v6, v4\n" + "vxor.vv v7, v7, v5\n" + "vse8.v v6, (%[wp1])\n" + "vse8.v v7, (%[wq1])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]) + ); + } +} + +static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + unsigned long d; + int z, z0; + u8 *p, *q; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0 + 1]; /* XOR parity */ + q = dptr[z0 + 2]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 + * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 + */ + for (d = 0; d < bytes; d += NSIZE * 4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + "vle8.v v8, (%[wp2])\n" + "vle8.v v9, (%[wp2])\n" + "vle8.v v12, (%[wp3])\n" + "vle8.v v13, (%[wp3])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), + [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), + [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + ); + + for (z = z0 - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v11, v11, v10\n" + "vle8.v v10, (%[wd2])\n" + "vxor.vv v9, v11, v10\n" + "vxor.vv v8, v8, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v15, v15, v14\n" + "vle8.v v14, (%[wd3])\n" + "vxor.vv v13, v15, v14\n" + "vxor.vv v12, v12, v14\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd2]"r"(&dptr[z][d + 2 * NSIZE]), + [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] = wp$$; + * *(unative_t *)&q[d+NSIZE*$$] = wq$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vse8.v v0, (%[wp0])\n" + "vse8.v v1, (%[wq0])\n" + "vse8.v v4, (%[wp1])\n" + "vse8.v v5, (%[wq1])\n" + "vse8.v v8, (%[wp2])\n" + "vse8.v v9, (%[wq2])\n" + "vse8.v v12, (%[wp3])\n" + "vse8.v v13, (%[wq3])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]), + [wp2]"r"(&p[d + NSIZE * 2]), + [wq2]"r"(&q[d + NSIZE * 2]), + [wp3]"r"(&p[d + NSIZE * 3]), + [wq3]"r"(&q[d + NSIZE * 3]) + ); + } +} + +static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, + unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + unsigned long d; + int z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks - 2]; /* XOR parity */ + q = dptr[disks - 1]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 + * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 + */ + for (d = 0; d < bytes; d += NSIZE * 4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + "vle8.v v8, (%[wp2])\n" + "vle8.v v9, (%[wp2])\n" + "vle8.v v12, (%[wp3])\n" + "vle8.v v13, (%[wp3])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), + [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), + [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + ); + + /* P/Q data pages */ + for (z = z0 - 1; z >= start; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v11, v11, v10\n" + "vle8.v v10, (%[wd2])\n" + "vxor.vv v9, v11, v10\n" + "vxor.vv v8, v8, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v15, v15, v14\n" + "vle8.v v14, (%[wd3])\n" + "vxor.vv v13, v15, v14\n" + "vxor.vv v12, v12, v14\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd2]"r"(&dptr[z][d + 2 * NSIZE]), + [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* P/Q left side optimization */ + for (z = start - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * wq$$ = w1$$ ^ w2$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v1, v3, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v5, v7, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v9, v11, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v13, v15, v14\n" + ".option pop\n" + : : + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + * v0:wp0, v1:wq0, v2:p0, v3:q0 + * v4:wp1, v5:wq1, v6:p1, v7:q1 + * v8:wp2, v9:wq2, v10:p2, v11:q2 + * v12:wp3, v13:wq3, v14:p3, v15:q3 + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v2, (%[wp0])\n" + "vle8.v v3, (%[wq0])\n" + "vxor.vv v2, v2, v0\n" + "vxor.vv v3, v3, v1\n" + "vse8.v v2, (%[wp0])\n" + "vse8.v v3, (%[wq0])\n" + + "vle8.v v6, (%[wp1])\n" + "vle8.v v7, (%[wq1])\n" + "vxor.vv v6, v6, v4\n" + "vxor.vv v7, v7, v5\n" + "vse8.v v6, (%[wp1])\n" + "vse8.v v7, (%[wq1])\n" + + "vle8.v v10, (%[wp2])\n" + "vle8.v v11, (%[wq2])\n" + "vxor.vv v10, v10, v8\n" + "vxor.vv v11, v11, v9\n" + "vse8.v v10, (%[wp2])\n" + "vse8.v v11, (%[wq2])\n" + + "vle8.v v14, (%[wp3])\n" + "vle8.v v15, (%[wq3])\n" + "vxor.vv v14, v14, v12\n" + "vxor.vv v15, v15, v13\n" + "vse8.v v14, (%[wp3])\n" + "vse8.v v15, (%[wq3])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]), + [wp2]"r"(&p[d + NSIZE * 2]), + [wq2]"r"(&q[d + NSIZE * 2]), + [wp3]"r"(&p[d + NSIZE * 3]), + [wq3]"r"(&q[d + NSIZE * 3]) + ); + } +} + +static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + unsigned long d; + int z, z0; + u8 *p, *q; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0 + 1]; /* XOR parity */ + q = dptr[z0 + 2]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 + * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 + * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 + * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 + * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 + * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 + */ + for (d = 0; d < bytes; d += NSIZE * 8) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + "vle8.v v8, (%[wp2])\n" + "vle8.v v9, (%[wp2])\n" + "vle8.v v12, (%[wp3])\n" + "vle8.v v13, (%[wp3])\n" + "vle8.v v16, (%[wp4])\n" + "vle8.v v17, (%[wp4])\n" + "vle8.v v20, (%[wp5])\n" + "vle8.v v21, (%[wp5])\n" + "vle8.v v24, (%[wp6])\n" + "vle8.v v25, (%[wp6])\n" + "vle8.v v28, (%[wp7])\n" + "vle8.v v29, (%[wp7])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), + [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), + [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), + [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), + [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), + [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), + [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + ); + + for (z = z0 - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v11, v11, v10\n" + "vle8.v v10, (%[wd2])\n" + "vxor.vv v9, v11, v10\n" + "vxor.vv v8, v8, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v15, v15, v14\n" + "vle8.v v14, (%[wd3])\n" + "vxor.vv v13, v15, v14\n" + "vxor.vv v12, v12, v14\n" + + "vsra.vi v18, v17, 7\n" + "vsll.vi v19, v17, 1\n" + "vand.vx v18, v18, %[x1d]\n" + "vxor.vv v19, v19, v18\n" + "vle8.v v18, (%[wd4])\n" + "vxor.vv v17, v19, v18\n" + "vxor.vv v16, v16, v18\n" + + "vsra.vi v22, v21, 7\n" + "vsll.vi v23, v21, 1\n" + "vand.vx v22, v22, %[x1d]\n" + "vxor.vv v23, v23, v22\n" + "vle8.v v22, (%[wd5])\n" + "vxor.vv v21, v23, v22\n" + "vxor.vv v20, v20, v22\n" + + "vsra.vi v26, v25, 7\n" + "vsll.vi v27, v25, 1\n" + "vand.vx v26, v26, %[x1d]\n" + "vxor.vv v27, v27, v26\n" + "vle8.v v26, (%[wd6])\n" + "vxor.vv v25, v27, v26\n" + "vxor.vv v24, v24, v26\n" + + "vsra.vi v30, v29, 7\n" + "vsll.vi v31, v29, 1\n" + "vand.vx v30, v30, %[x1d]\n" + "vxor.vv v31, v31, v30\n" + "vle8.v v30, (%[wd7])\n" + "vxor.vv v29, v31, v30\n" + "vxor.vv v28, v28, v30\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd2]"r"(&dptr[z][d + 2 * NSIZE]), + [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd4]"r"(&dptr[z][d + 4 * NSIZE]), + [wd5]"r"(&dptr[z][d + 5 * NSIZE]), + [wd6]"r"(&dptr[z][d + 6 * NSIZE]), + [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] = wp$$; + * *(unative_t *)&q[d+NSIZE*$$] = wq$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vse8.v v0, (%[wp0])\n" + "vse8.v v1, (%[wq0])\n" + "vse8.v v4, (%[wp1])\n" + "vse8.v v5, (%[wq1])\n" + "vse8.v v8, (%[wp2])\n" + "vse8.v v9, (%[wq2])\n" + "vse8.v v12, (%[wp3])\n" + "vse8.v v13, (%[wq3])\n" + "vse8.v v16, (%[wp4])\n" + "vse8.v v17, (%[wq4])\n" + "vse8.v v20, (%[wp5])\n" + "vse8.v v21, (%[wq5])\n" + "vse8.v v24, (%[wp6])\n" + "vse8.v v25, (%[wq6])\n" + "vse8.v v28, (%[wp7])\n" + "vse8.v v29, (%[wq7])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]), + [wp2]"r"(&p[d + NSIZE * 2]), + [wq2]"r"(&q[d + NSIZE * 2]), + [wp3]"r"(&p[d + NSIZE * 3]), + [wq3]"r"(&q[d + NSIZE * 3]), + [wp4]"r"(&p[d + NSIZE * 4]), + [wq4]"r"(&q[d + NSIZE * 4]), + [wp5]"r"(&p[d + NSIZE * 5]), + [wq5]"r"(&q[d + NSIZE * 5]), + [wp6]"r"(&p[d + NSIZE * 6]), + [wq6]"r"(&q[d + NSIZE * 6]), + [wp7]"r"(&p[d + NSIZE * 7]), + [wq7]"r"(&q[d + NSIZE * 7]) + ); + } +} + +static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, + unsigned long bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + unsigned long d; + int z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks - 2]; /* XOR parity */ + q = dptr[disks - 1]; /* RS syndrome */ + + asm volatile (".option push\n" + ".option arch,+v\n" + "vsetvli t0, x0, e8, m1, ta, ma\n" + ".option pop\n" + ); + + /* + * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 + * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 + * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 + * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 + * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 + * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 + * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 + * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 + */ + for (d = 0; d < bytes; d += NSIZE * 8) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v0, (%[wp0])\n" + "vle8.v v1, (%[wp0])\n" + "vle8.v v4, (%[wp1])\n" + "vle8.v v5, (%[wp1])\n" + "vle8.v v8, (%[wp2])\n" + "vle8.v v9, (%[wp2])\n" + "vle8.v v12, (%[wp3])\n" + "vle8.v v13, (%[wp3])\n" + "vle8.v v16, (%[wp4])\n" + "vle8.v v17, (%[wp4])\n" + "vle8.v v20, (%[wp5])\n" + "vle8.v v21, (%[wp5])\n" + "vle8.v v24, (%[wp6])\n" + "vle8.v v25, (%[wp6])\n" + "vle8.v v28, (%[wp7])\n" + "vle8.v v29, (%[wp7])\n" + ".option pop\n" + : : + [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), + [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), + [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), + [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), + [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), + [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), + [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), + [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + ); + + /* P/Q data pages */ + for (z = z0 - 1; z >= start; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * w1$$ ^= w2$$; + * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + * wq$$ = w1$$ ^ wd$$; + * wp$$ ^= wd$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v3, v3, v2\n" + "vle8.v v2, (%[wd0])\n" + "vxor.vv v1, v3, v2\n" + "vxor.vv v0, v0, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v7, v7, v6\n" + "vle8.v v6, (%[wd1])\n" + "vxor.vv v5, v7, v6\n" + "vxor.vv v4, v4, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v11, v11, v10\n" + "vle8.v v10, (%[wd2])\n" + "vxor.vv v9, v11, v10\n" + "vxor.vv v8, v8, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v15, v15, v14\n" + "vle8.v v14, (%[wd3])\n" + "vxor.vv v13, v15, v14\n" + "vxor.vv v12, v12, v14\n" + + "vsra.vi v18, v17, 7\n" + "vsll.vi v19, v17, 1\n" + "vand.vx v18, v18, %[x1d]\n" + "vxor.vv v19, v19, v18\n" + "vle8.v v18, (%[wd4])\n" + "vxor.vv v17, v19, v18\n" + "vxor.vv v16, v16, v18\n" + + "vsra.vi v22, v21, 7\n" + "vsll.vi v23, v21, 1\n" + "vand.vx v22, v22, %[x1d]\n" + "vxor.vv v23, v23, v22\n" + "vle8.v v22, (%[wd5])\n" + "vxor.vv v21, v23, v22\n" + "vxor.vv v20, v20, v22\n" + + "vsra.vi v26, v25, 7\n" + "vsll.vi v27, v25, 1\n" + "vand.vx v26, v26, %[x1d]\n" + "vxor.vv v27, v27, v26\n" + "vle8.v v26, (%[wd6])\n" + "vxor.vv v25, v27, v26\n" + "vxor.vv v24, v24, v26\n" + + "vsra.vi v30, v29, 7\n" + "vsll.vi v31, v29, 1\n" + "vand.vx v30, v30, %[x1d]\n" + "vxor.vv v31, v31, v30\n" + "vle8.v v30, (%[wd7])\n" + "vxor.vv v29, v31, v30\n" + "vxor.vv v28, v28, v30\n" + ".option pop\n" + : : + [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd2]"r"(&dptr[z][d + 2 * NSIZE]), + [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd4]"r"(&dptr[z][d + 4 * NSIZE]), + [wd5]"r"(&dptr[z][d + 5 * NSIZE]), + [wd6]"r"(&dptr[z][d + 6 * NSIZE]), + [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [x1d]"r"(0x1d) + ); + } + + /* P/Q left side optimization */ + for (z = start - 1; z >= 0; z--) { + /* + * w2$$ = MASK(wq$$); + * w1$$ = SHLBYTE(wq$$); + * w2$$ &= NBYTES(0x1d); + * wq$$ = w1$$ ^ w2$$; + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vsra.vi v2, v1, 7\n" + "vsll.vi v3, v1, 1\n" + "vand.vx v2, v2, %[x1d]\n" + "vxor.vv v1, v3, v2\n" + + "vsra.vi v6, v5, 7\n" + "vsll.vi v7, v5, 1\n" + "vand.vx v6, v6, %[x1d]\n" + "vxor.vv v5, v7, v6\n" + + "vsra.vi v10, v9, 7\n" + "vsll.vi v11, v9, 1\n" + "vand.vx v10, v10, %[x1d]\n" + "vxor.vv v9, v11, v10\n" + + "vsra.vi v14, v13, 7\n" + "vsll.vi v15, v13, 1\n" + "vand.vx v14, v14, %[x1d]\n" + "vxor.vv v13, v15, v14\n" + + "vsra.vi v18, v17, 7\n" + "vsll.vi v19, v17, 1\n" + "vand.vx v18, v18, %[x1d]\n" + "vxor.vv v17, v19, v18\n" + + "vsra.vi v22, v21, 7\n" + "vsll.vi v23, v21, 1\n" + "vand.vx v22, v22, %[x1d]\n" + "vxor.vv v21, v23, v22\n" + + "vsra.vi v26, v25, 7\n" + "vsll.vi v27, v25, 1\n" + "vand.vx v26, v26, %[x1d]\n" + "vxor.vv v25, v27, v26\n" + + "vsra.vi v30, v29, 7\n" + "vsll.vi v31, v29, 1\n" + "vand.vx v30, v30, %[x1d]\n" + "vxor.vv v29, v31, v30\n" + ".option pop\n" + : : + [x1d]"r"(0x1d) + ); + } + + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + * v0:wp0, v1:wq0, v2:p0, v3:q0 + * v4:wp1, v5:wq1, v6:p1, v7:q1 + * v8:wp2, v9:wq2, v10:p2, v11:q2 + * v12:wp3, v13:wq3, v14:p3, v15:q3 + * v16:wp4, v17:wq4, v18:p4, v19:q4 + * v20:wp5, v21:wq5, v22:p5, v23:q5 + * v24:wp6, v25:wq6, v26:p6, v27:q6 + * v28:wp7, v29:wq7, v30:p7, v31:q7 + */ + asm volatile (".option push\n" + ".option arch,+v\n" + "vle8.v v2, (%[wp0])\n" + "vle8.v v3, (%[wq0])\n" + "vxor.vv v2, v2, v0\n" + "vxor.vv v3, v3, v1\n" + "vse8.v v2, (%[wp0])\n" + "vse8.v v3, (%[wq0])\n" + + "vle8.v v6, (%[wp1])\n" + "vle8.v v7, (%[wq1])\n" + "vxor.vv v6, v6, v4\n" + "vxor.vv v7, v7, v5\n" + "vse8.v v6, (%[wp1])\n" + "vse8.v v7, (%[wq1])\n" + + "vle8.v v10, (%[wp2])\n" + "vle8.v v11, (%[wq2])\n" + "vxor.vv v10, v10, v8\n" + "vxor.vv v11, v11, v9\n" + "vse8.v v10, (%[wp2])\n" + "vse8.v v11, (%[wq2])\n" + + "vle8.v v14, (%[wp3])\n" + "vle8.v v15, (%[wq3])\n" + "vxor.vv v14, v14, v12\n" + "vxor.vv v15, v15, v13\n" + "vse8.v v14, (%[wp3])\n" + "vse8.v v15, (%[wq3])\n" + + "vle8.v v18, (%[wp4])\n" + "vle8.v v19, (%[wq4])\n" + "vxor.vv v18, v18, v16\n" + "vxor.vv v19, v19, v17\n" + "vse8.v v18, (%[wp4])\n" + "vse8.v v19, (%[wq4])\n" + + "vle8.v v22, (%[wp5])\n" + "vle8.v v23, (%[wq5])\n" + "vxor.vv v22, v22, v20\n" + "vxor.vv v23, v23, v21\n" + "vse8.v v22, (%[wp5])\n" + "vse8.v v23, (%[wq5])\n" + + "vle8.v v26, (%[wp6])\n" + "vle8.v v27, (%[wq6])\n" + "vxor.vv v26, v26, v24\n" + "vxor.vv v27, v27, v25\n" + "vse8.v v26, (%[wp6])\n" + "vse8.v v27, (%[wq6])\n" + + "vle8.v v30, (%[wp7])\n" + "vle8.v v31, (%[wq7])\n" + "vxor.vv v30, v30, v28\n" + "vxor.vv v31, v31, v29\n" + "vse8.v v30, (%[wp7])\n" + "vse8.v v31, (%[wq7])\n" + ".option pop\n" + : : + [wp0]"r"(&p[d + NSIZE * 0]), + [wq0]"r"(&q[d + NSIZE * 0]), + [wp1]"r"(&p[d + NSIZE * 1]), + [wq1]"r"(&q[d + NSIZE * 1]), + [wp2]"r"(&p[d + NSIZE * 2]), + [wq2]"r"(&q[d + NSIZE * 2]), + [wp3]"r"(&p[d + NSIZE * 3]), + [wq3]"r"(&q[d + NSIZE * 3]), + [wp4]"r"(&p[d + NSIZE * 4]), + [wq4]"r"(&q[d + NSIZE * 4]), + [wp5]"r"(&p[d + NSIZE * 5]), + [wq5]"r"(&q[d + NSIZE * 5]), + [wp6]"r"(&p[d + NSIZE * 6]), + [wq6]"r"(&q[d + NSIZE * 6]), + [wp7]"r"(&p[d + NSIZE * 7]), + [wq7]"r"(&q[d + NSIZE * 7]) + ); + } +} + +RAID6_RVV_WRAPPER(1); +RAID6_RVV_WRAPPER(2); +RAID6_RVV_WRAPPER(4); +RAID6_RVV_WRAPPER(8); diff --git a/lib/raid6/rvv.h b/lib/raid6/rvv.h new file mode 100644 index 000000000000..94044a1b707b --- /dev/null +++ b/lib/raid6/rvv.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2024 Institute of Software, CAS. + * + * raid6/rvv.h + * + * Definitions for RISC-V RAID-6 code + */ + +#define RAID6_RVV_WRAPPER(_n) \ + static void raid6_rvv ## _n ## _gen_syndrome(int disks, \ + size_t bytes, void **ptrs) \ + { \ + void raid6_rvv ## _n ## _gen_syndrome_real(int d, \ + unsigned long b, void **p); \ + kernel_vector_begin(); \ + raid6_rvv ## _n ## _gen_syndrome_real(disks, \ + (unsigned long)bytes, ptrs); \ + kernel_vector_end(); \ + } \ + static void raid6_rvv ## _n ## _xor_syndrome(int disks, \ + int start, int stop, \ + size_t bytes, void **ptrs) \ + { \ + void raid6_rvv ## _n ## _xor_syndrome_real(int d, \ + int s1, int s2, \ + unsigned long b, void **p); \ + kernel_vector_begin(); \ + raid6_rvv ## _n ## _xor_syndrome_real(disks, \ + start, stop, (unsigned long)bytes, ptrs); \ + kernel_vector_end(); \ + } \ + struct raid6_calls const raid6_rvvx ## _n = { \ + raid6_rvv ## _n ## _gen_syndrome, \ + raid6_rvv ## _n ## _xor_syndrome, \ + rvv_has_vector, \ + "rvvx" #_n, \ + 0 \ + } From a869b8c29f864b9530a6473a30c09546333b571a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 26 Apr 2025 21:59:54 +0800 Subject: [PATCH 67/69] riscv: enable mseal sysmap for RV64 Provide support for CONFIG_MSEAL_SYSTEM_MAPPINGS for RV64, covering the vdso, vvar. Passed sysmap_is_sealed and mseal_test self tests. Passed booting a buildroot rootfs image and a cli debian rootfs image. Signed-off-by: Jisheng Zhang Cc: Jeff Xu Link: https://lore.kernel.org/r/20250426135954.5614-1-jszhang@kernel.org Tested-by: Alexandre Ghiti Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/vdso.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..3cb0b05eef62 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -70,6 +70,7 @@ config RISCV # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000 + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_SUPPORTS_RT diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index cc2895d1fbc2..3a8e038b10a2 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -136,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm, ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP), vdso_info->cm); if (IS_ERR(ret)) From ee0d03053e7009a3a3532fb37f6c94bfa0a8cca3 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 11 Apr 2025 10:46:00 +0800 Subject: [PATCH 68/69] RISC-V: vDSO: Wire up getrandom() vDSO implementation Hook up the generic vDSO implementation to the generic vDSO getrandom implementation by providing the required __arch_chacha20_blocks_nostack and getrandom_syscall implementations. Also wire up the selftests. The benchmark result: vdso: 25000000 times in 2.466341333 seconds libc: 25000000 times in 41.447720005 seconds syscall: 25000000 times in 41.043926672 seconds vdso: 25000000 x 256 times in 162.286219353 seconds libc: 25000000 x 256 times in 2953.855018685 seconds syscall: 25000000 x 256 times in 2796.268546000 seconds [ alex: - Fix dynamic relocation - Squash Nathan's fix https://lore.kernel.org/all/20250423-riscv-fix-compat_vdso-lld-v2-1-b7bbbc244501@kernel.org/ - Add comment from Loongarch ] Signed-off-by: Xi Ruoyao Link: https://lore.kernel.org/r/20250411024600.16045-1-xry111@xry111.site Tested-by: Alexandre Ghiti Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/vdso/getrandom.h | 30 +++ arch/riscv/kernel/vdso/Makefile | 13 + arch/riscv/kernel/vdso/getrandom.c | 10 + arch/riscv/kernel/vdso/vdso.lds.S | 3 + arch/riscv/kernel/vdso/vgetrandom-chacha.S | 249 ++++++++++++++++++ .../selftests/vDSO/vgetrandom-chacha.S | 2 + 7 files changed, 308 insertions(+) create mode 100644 arch/riscv/include/asm/vdso/getrandom.h create mode 100644 arch/riscv/kernel/vdso/getrandom.c create mode 100644 arch/riscv/kernel/vdso/vgetrandom-chacha.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3cb0b05eef62..b5b3ead92f64 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -219,6 +219,7 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU + select VDSO_GETRANDOM if HAVE_GENERIC_VDSO select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..8dc92441702a --- /dev/null +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + */ +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include + +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register long ret asm("a0"); + register long nr asm("a7") = __NR_getrandom; + register void *buffer asm("a0") = _buffer; + register size_t len asm("a1") = _len; + register unsigned int flags asm("a2") = _flags; + + asm volatile ("ecall\n" + : "+r" (ret) + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) + : "memory"); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index ad73607abc28..dca888852d93 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -13,9 +13,17 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe +ifdef CONFIG_VDSO_GETRANDOM +vdso-syms += getrandom +endif + # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifdef CONFIG_VDSO_GETRANDOM +obj-vdso += vgetrandom-chacha.o +endif + ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -fno-builtin @@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) +endif + CFLAGS_hwprobe.o += -fPIC # Build rules @@ -38,6 +50,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Force dependency diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c new file mode 100644 index 000000000000..f21922e8cebd --- /dev/null +++ b/arch/riscv/kernel/vdso/getrandom.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + */ +#include + +ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8e86965a8aae..7c15b0f4ee3b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -79,6 +79,9 @@ VERSION __vdso_flush_icache; #ifndef COMPAT_VDSO __vdso_riscv_hwprobe; +#endif +#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO) + __vdso_getrandom; #endif local: *; }; diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..5f0dad8f2373 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Xi Ruoyao . All Rights Reserved. + * + * Based on arch/loongarch/vdso/vgetrandom-chacha.S. + */ + +#include +#include + +.text + +.macro ROTRI rd rs imm + slliw t0, \rs, 32 - \imm + srliw \rd, \rs, \imm + or \rd, \rd, t0 +.endm + +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 +.endm + +/* + * a0: output bytes + * a1: 32-byte key input + * a2: 8-byte counter input/output + * a3: number of 64-byte blocks to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + +#define output a0 +#define key a1 +#define counter a2 +#define nblocks a3 +#define i a4 +#define state0 s0 +#define state1 s1 +#define state2 s2 +#define state3 s3 +#define state4 s4 +#define state5 s5 +#define state6 s6 +#define state7 s7 +#define state8 s8 +#define state9 s9 +#define state10 s10 +#define state11 s11 +#define state12 a5 +#define state13 a6 +#define state14 a7 +#define state15 t1 +#define cnt t2 +#define copy0 t3 +#define copy1 t4 +#define copy2 t5 +#define copy3 t6 + +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + + /* + * The ABI requires s0-s9 saved. + * This does not violate the stack-less requirement: no sensitive data + * is spilled onto the stack. + */ + addi sp, sp, -12*SZREG + REG_S s0, (sp) + REG_S s1, SZREG(sp) + REG_S s2, 2*SZREG(sp) + REG_S s3, 3*SZREG(sp) + REG_S s4, 4*SZREG(sp) + REG_S s5, 5*SZREG(sp) + REG_S s6, 6*SZREG(sp) + REG_S s7, 7*SZREG(sp) + REG_S s8, 8*SZREG(sp) + REG_S s9, 9*SZREG(sp) + REG_S s10, 10*SZREG(sp) + REG_S s11, 11*SZREG(sp) + + ld cnt, (counter) + + li copy0, 0x61707865 + li copy1, 0x3320646e + li copy2, 0x79622d32 + li copy3, 0x6b206574 + +.Lblock: + /* state[0,1,2,3] = "expand 32-byte k" */ + mv state0, copy0 + mv state1, copy1 + mv state2, copy2 + mv state3, copy3 + + /* state[4,5,..,11] = key */ + lw state4, (key) + lw state5, 4(key) + lw state6, 8(key) + lw state7, 12(key) + lw state8, 16(key) + lw state9, 20(key) + lw state10, 24(key) + lw state11, 28(key) + + /* state[12,13] = counter */ + mv state12, cnt + srli state13, cnt, 32 + + /* state[14,15] = 0 */ + mv state14, zero + mv state15, zero + + li i, 10 +.Lpermute: + /* odd round */ + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _16 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _20 + + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _24 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _25 + + /* even round */ + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _16 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _20 + + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _24 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _25 + + addi i, i, -1 + bnez i, .Lpermute + + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ + OP_4REG addw line0, copy + sw state0, (output) + sw state1, 4(output) + sw state2, 8(output) + sw state3, 12(output) + + /* from now on state[0,1,2,3] are scratch registers */ + + /* state[0,1,2,3] = lo(key) */ + lw state0, (key) + lw state1, 4(key) + lw state2, 8(key) + lw state3, 12(key) + + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ + OP_4REG addw line1, line0 + sw state4, 16(output) + sw state5, 20(output) + sw state6, 24(output) + sw state7, 28(output) + + /* state[0,1,2,3] = hi(key) */ + lw state0, 16(key) + lw state1, 20(key) + lw state2, 24(key) + lw state3, 28(key) + + /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ + OP_4REG addw line2, line0 + sw state8, 32(output) + sw state9, 36(output) + sw state10, 40(output) + sw state11, 44(output) + + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ + addw state12, state12, cnt + srli state0, cnt, 32 + addw state13, state13, state0 + sw state12, 48(output) + sw state13, 52(output) + sw state14, 56(output) + sw state15, 60(output) + + /* ++counter */ + addi cnt, cnt, 1 + + /* output += 64 */ + addi output, output, 64 + /* --nblocks */ + addi nblocks, nblocks, -1 + bnez nblocks, .Lblock + + /* counter = [cnt_lo, cnt_hi] */ + sd cnt, (counter) + + /* Zero out the potentially sensitive regs, in case nothing uses these + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and + * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we + * only need to zero state[12,...,15]. + */ + mv state12, zero + mv state13, zero + mv state14, zero + mv state15, zero + + REG_L s0, (sp) + REG_L s1, SZREG(sp) + REG_L s2, 2*SZREG(sp) + REG_L s3, 3*SZREG(sp) + REG_L s4, 4*SZREG(sp) + REG_L s5, 5*SZREG(sp) + REG_L s6, 6*SZREG(sp) + REG_L s7, 7*SZREG(sp) + REG_L s8, 8*SZREG(sp) + REG_L s9, 9*SZREG(sp) + REG_L s10, 10*SZREG(sp) + REG_L s11, 11*SZREG(sp) + addi sp, sp, 12*SZREG + + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S index d6e09af7c0a9..a4a82e1c28a9 100644 --- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -11,6 +11,8 @@ #include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" #elif defined(__powerpc__) || defined(__powerpc64__) #include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__riscv) && __riscv_xlen == 64 +#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S" #elif defined(__s390x__) #include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" #elif defined(__x86_64__) From 265d6aba165c500389c80d394ac247460c443ef5 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 2 Jun 2025 12:15:43 +0000 Subject: [PATCH 69/69] riscv: uaccess: Only restore the CSR_STATUS SUM bit During switch to csrs will OR the value of the register into the corresponding csr. In this case we're only interested in restoring the SUM bit not the entire register. Signed-off-by: Cyril Bur Link: https://lore.kernel.org/r/20250522160954.429333-1-cyrilbur@tenstorrent.com Co-developed-by: Alexandre Ghiti Signed-off-by: Alexandre Ghiti Fixes: 788aa64c01f1 ("riscv: save the SR_SUM status over switches") Link: https://lore.kernel.org/r/20250602121543.1544278-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 2 +- arch/riscv/kernel/asm-offsets.c | 6 +++--- arch/riscv/kernel/entry.S | 9 +++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 8111250f3c1b..24d3af4d3807 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -110,7 +110,7 @@ struct thread_struct { struct __riscv_d_ext_state fstate; unsigned long bad_cause; unsigned long envcfg; - unsigned long status; + unsigned long sum; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 9420ec6a50fd..6e8c0d6feae9 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -34,7 +34,7 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); - OFFSET(TASK_THREAD_STATUS, task_struct, thread.status); + OFFSET(TASK_THREAD_SUM, task_struct, thread.sum); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); @@ -347,8 +347,8 @@ void asm_offsets(void) offsetof(struct task_struct, thread.s[11]) - offsetof(struct task_struct, thread.ra) ); - DEFINE(TASK_THREAD_STATUS_RA, - offsetof(struct task_struct, thread.status) + DEFINE(TASK_THREAD_SUM_RA, + offsetof(struct task_struct, thread.sum) - offsetof(struct task_struct, thread.ra) ); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 00bd0de9faa2..a49e19ce3a97 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -399,14 +399,15 @@ SYM_FUNC_START(__switch_to) REG_S s11, TASK_THREAD_S11_RA(a3) /* save the user space access flag */ - li s0, SR_SUM - csrr s1, CSR_STATUS - REG_S s1, TASK_THREAD_STATUS_RA(a3) + csrr s0, CSR_STATUS + REG_S s0, TASK_THREAD_SUM_RA(a3) /* Save the kernel shadow call stack pointer */ scs_save_current /* Restore context from next->thread */ - REG_L s0, TASK_THREAD_STATUS_RA(a4) + REG_L s0, TASK_THREAD_SUM_RA(a4) + li s1, SR_SUM + and s0, s0, s1 csrs CSR_STATUS, s0 REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4)