From e59236b5a09e168fdf961a10d2519cef44f5d6b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 11:38:38 -0700 Subject: [PATCH 1/2] x86/sgx: Use SHA-256 library API instead of crypto_shash API This user of SHA-256 does not support any other algorithm, so the crypto_shash abstraction provides no value. Just use the SHA-256 library API instead, which is much simpler and easier to use. Signed-off-by: Eric Biggers Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20250428183838.799333-1-ebiggers%40kernel.org --- arch/x86/Kconfig | 3 +-- arch/x86/kernel/cpu/sgx/driver.h | 1 - arch/x86/kernel/cpu/sgx/ioctl.c | 30 ++---------------------------- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b9f378e05f6..6eb0ebbeab6d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1881,8 +1881,7 @@ endchoice config X86_SGX bool "Software Guard eXtensions (SGX)" depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC - depends on CRYPTO=y - depends on CRYPTO_SHA256=y + select CRYPTO_LIB_SHA256 select MMU_NOTIFIER select NUMA_KEEP_MEMINFO if NUMA select XARRAY_MULTI diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h index 4eddb4d571ef..30f39f92c98f 100644 --- a/arch/x86/kernel/cpu/sgx/driver.h +++ b/arch/x86/kernel/cpu/sgx/driver.h @@ -2,7 +2,6 @@ #ifndef __ARCH_SGX_DRIVER_H__ #define __ARCH_SGX_DRIVER_H__ -#include #include #include #include diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 776a20172867..66f1efa16fbb 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -463,31 +464,6 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) return ret; } -static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus, - void *hash) -{ - SHASH_DESC_ON_STACK(shash, tfm); - - shash->tfm = tfm; - - return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash); -} - -static int sgx_get_key_hash(const void *modulus, void *hash) -{ - struct crypto_shash *tfm; - int ret; - - tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - ret = __sgx_get_key_hash(tfm, modulus, hash); - - crypto_free_shash(tfm); - return ret; -} - static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, void *token) { @@ -523,9 +499,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, sgx_xfrm_reserved_mask) return -EINVAL; - ret = sgx_get_key_hash(sigstruct->modulus, mrsigner); - if (ret) - return ret; + sha256(sigstruct->modulus, SGX_MODULUS_SIZE, (u8 *)mrsigner); mutex_lock(&encl->lock); From ed16618c380c32c68c06186d0ccbb0d5e0586e59 Mon Sep 17 00:00:00 2001 From: Andrew Zaborowski Date: Fri, 9 May 2025 01:04:29 +0200 Subject: [PATCH 2/2] x86/sgx: Prevent attempts to reclaim poisoned pages TL;DR: SGX page reclaim touches the page to copy its contents to secondary storage. SGX instructions do not gracefully handle machine checks. Despite this, the existing SGX code will try to reclaim pages that it _knows_ are poisoned. Avoid even trying to reclaim poisoned pages. The longer story: Pages used by an enclave only get epc_page->poison set in arch_memory_failure() but they currently stay on sgx_active_page_list until sgx_encl_release(), with the SGX_EPC_PAGE_RECLAIMER_TRACKED flag untouched. epc_page->poison is not checked in the reclaimer logic meaning that, if other conditions are met, an attempt will be made to reclaim an EPC page that was poisoned. This is bad because 1. we don't want that page to end up added to another enclave and 2. it is likely to cause one core to shut down and the kernel to panic. Specifically, reclaiming uses microcode operations including "EWB" which accesses the EPC page contents to encrypt and write them out to non-SGX memory. Those operations cannot handle MCEs in their accesses other than by putting the executing core into a special shutdown state (affecting both threads with HT.) The kernel will subsequently panic on the remaining cores seeing the core didn't enter MCE handler(s) in time. Call sgx_unmark_page_reclaimable() to remove the affected EPC page from sgx_active_page_list on memory error to stop it being considered for reclaiming. Testing epc_page->poison in sgx_reclaim_pages() would also work but I assume it's better to add code in the less likely paths. The affected EPC page is not added to &node->sgx_poison_page_list until later in sgx_encl_release()->sgx_free_epc_page() when it is EREMOVEd. Membership on other lists doesn't change to avoid changing any of the lists' semantics except for sgx_active_page_list. There's a "TBD" comment in arch_memory_failure() about pre-emptive actions, the goal here is not to address everything that it may imply. This also doesn't completely close the time window when a memory error notification will be fatal (for a not previously poisoned EPC page) -- the MCE can happen after sgx_reclaim_pages() has selected its candidates or even *inside* a microcode operation (actually easy to trigger due to the amount of time spent in them.) The spinlock in sgx_unmark_page_reclaimable() is safe because memory_failure() runs in process context and no spinlocks are held, explicitly noted in a mm/memory-failure.c comment. Signed-off-by: Andrew Zaborowski Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Tony Luck Cc: balrogg@gmail.com Cc: linux-sgx@vger.kernel.org Link: https://lore.kernel.org/r/20250508230429.456271-1-andrew.zaborowski@intel.com --- arch/x86/kernel/cpu/sgx/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 8ce352fc72ac..7c199773705a 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -719,6 +719,8 @@ int arch_memory_failure(unsigned long pfn, int flags) goto out; } + sgx_unmark_page_reclaimable(page); + /* * TBD: Add additional plumbing to enable pre-emptive * action for asynchronous poison notification. Until