forked from mirrors/linux
		
	arm64: split thread_info from task stack
This patch moves arm64's struct thread_info from the task stack into task_struct. This protects thread_info from corruption in the case of stack overflows, and makes its address harder to determine if stack addresses are leaked, making a number of attacks more difficult. Precise detection and handling of overflow is left for subsequent patches. Largely, this involves changing code to store the task_struct in sp_el0, and acquire the thread_info from the task struct. Core code now implements current_thread_info(), and as noted in <linux/sched.h> this relies on offsetof(task_struct, thread_info) == 0, enforced by core code. This change means that the 'tsk' register used in entry.S now points to a task_struct, rather than a thread_info as it used to. To make this clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets appropriately updated to account for the structural change. Userspace clobbers sp_el0, and we can no longer restore this from the stack. Instead, the current task is cached in a per-cpu variable that we can safely access from early assembly as interrupts are disabled (and we are thus not preemptible). Both secondary entry and idle are updated to stash the sp and task pointer separately. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Tested-by: Laura Abbott <labbott@redhat.com> Cc: AKASHI Takahiro <takahiro.akashi@linaro.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: James Morse <james.morse@arm.com> Cc: Kees Cook <keescook@chromium.org> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> Cc: Will Deacon <will.deacon@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
This commit is contained in:
		
							parent
							
								
									1b7e2296a8
								
							
						
					
					
						commit
						c02433dd6d
					
				
					 10 changed files with 73 additions and 54 deletions
				
			
		| 
						 | 
				
			
			@ -109,6 +109,7 @@ config ARM64
 | 
			
		|||
	select POWER_SUPPLY
 | 
			
		||||
	select SPARSE_IRQ
 | 
			
		||||
	select SYSCTL_EXCEPTION_TRACE
 | 
			
		||||
	select THREAD_INFO_IN_TASK
 | 
			
		||||
	help
 | 
			
		||||
	  ARM 64-bit (AArch64) Linux support.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,6 @@
 | 
			
		|||
generic-y += bugs.h
 | 
			
		||||
generic-y += clkdev.h
 | 
			
		||||
generic-y += cputime.h
 | 
			
		||||
generic-y += current.h
 | 
			
		||||
generic-y += delay.h
 | 
			
		||||
generic-y += div64.h
 | 
			
		||||
generic-y += dma.h
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								arch/arm64/include/asm/current.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								arch/arm64/include/asm/current.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
#ifndef __ASM_CURRENT_H
 | 
			
		||||
#define __ASM_CURRENT_H
 | 
			
		||||
 | 
			
		||||
#include <linux/compiler.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/sysreg.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __ASSEMBLY__
 | 
			
		||||
 | 
			
		||||
struct task_struct;
 | 
			
		||||
 | 
			
		||||
static __always_inline struct task_struct *get_current(void)
 | 
			
		||||
{
 | 
			
		||||
	return (struct task_struct *)read_sysreg(sp_el0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define current get_current()
 | 
			
		||||
 | 
			
		||||
#endif /* __ASSEMBLY__ */
 | 
			
		||||
 | 
			
		||||
#endif /* __ASM_CURRENT_H */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -82,6 +82,7 @@ asmlinkage void secondary_start_kernel(void);
 | 
			
		|||
 */
 | 
			
		||||
struct secondary_data {
 | 
			
		||||
	void *stack;
 | 
			
		||||
	struct task_struct *task;
 | 
			
		||||
	long status;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,41 +47,17 @@ typedef unsigned long mm_segment_t;
 | 
			
		|||
struct thread_info {
 | 
			
		||||
	unsigned long		flags;		/* low level flags */
 | 
			
		||||
	mm_segment_t		addr_limit;	/* address limit */
 | 
			
		||||
	struct task_struct	*task;		/* main task structure */
 | 
			
		||||
	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 | 
			
		||||
	int			cpu;		/* cpu */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define INIT_THREAD_INFO(tsk)						\
 | 
			
		||||
{									\
 | 
			
		||||
	.task		= &tsk,						\
 | 
			
		||||
	.flags		= 0,						\
 | 
			
		||||
	.preempt_count	= INIT_PREEMPT_COUNT,				\
 | 
			
		||||
	.addr_limit	= KERNEL_DS,					\
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define init_stack		(init_thread_union.stack)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * how to get the thread information struct from C
 | 
			
		||||
 */
 | 
			
		||||
static inline struct thread_info *current_thread_info(void) __attribute_const__;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * struct thread_info can be accessed directly via sp_el0.
 | 
			
		||||
 *
 | 
			
		||||
 * We don't use read_sysreg() as we want the compiler to cache the value where
 | 
			
		||||
 * possible.
 | 
			
		||||
 */
 | 
			
		||||
static inline struct thread_info *current_thread_info(void)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long sp_el0;
 | 
			
		||||
 | 
			
		||||
	asm ("mrs %0, sp_el0" : "=r" (sp_el0));
 | 
			
		||||
 | 
			
		||||
	return (struct thread_info *)sp_el0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define thread_saved_pc(tsk)	\
 | 
			
		||||
	((unsigned long)(tsk->thread.cpu_context.pc))
 | 
			
		||||
#define thread_saved_sp(tsk)	\
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -36,9 +36,10 @@ int main(void)
 | 
			
		|||
{
 | 
			
		||||
  DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
 | 
			
		||||
  BLANK();
 | 
			
		||||
  DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
 | 
			
		||||
  DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
 | 
			
		||||
  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
 | 
			
		||||
  DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
 | 
			
		||||
  DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
 | 
			
		||||
  DEFINE(TSK_TI_ADDR_LIMIT,	offsetof(struct task_struct, thread_info.addr_limit));
 | 
			
		||||
  DEFINE(TSK_STACK,		offsetof(struct task_struct, stack));
 | 
			
		||||
  BLANK();
 | 
			
		||||
  DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
 | 
			
		||||
  BLANK();
 | 
			
		||||
| 
						 | 
				
			
			@ -121,6 +122,7 @@ int main(void)
 | 
			
		|||
  DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
 | 
			
		||||
  BLANK();
 | 
			
		||||
  DEFINE(CPU_BOOT_STACK,	offsetof(struct secondary_data, stack));
 | 
			
		||||
  DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
 | 
			
		||||
  BLANK();
 | 
			
		||||
#ifdef CONFIG_KVM_ARM_HOST
 | 
			
		||||
  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -90,9 +90,8 @@
 | 
			
		|||
 | 
			
		||||
	.if	\el == 0
 | 
			
		||||
	mrs	x21, sp_el0
 | 
			
		||||
	mov	tsk, sp
 | 
			
		||||
	and	tsk, tsk, #~(THREAD_SIZE - 1)	// Ensure MDSCR_EL1.SS is clear,
 | 
			
		||||
	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 | 
			
		||||
	ldr_this_cpu	tsk, __entry_task, x20	// Ensure MDSCR_EL1.SS is clear,
 | 
			
		||||
	ldr	x19, [tsk, #TSK_TI_FLAGS]	// since we can unmask debug
 | 
			
		||||
	disable_step_tsk x19, x20		// exceptions when scheduling.
 | 
			
		||||
 | 
			
		||||
	mov	x29, xzr			// fp pointed to user-space
 | 
			
		||||
| 
						 | 
				
			
			@ -100,10 +99,10 @@
 | 
			
		|||
	add	x21, sp, #S_FRAME_SIZE
 | 
			
		||||
	get_thread_info tsk
 | 
			
		||||
	/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
 | 
			
		||||
	ldr	x20, [tsk, #TI_ADDR_LIMIT]
 | 
			
		||||
	ldr	x20, [tsk, #TSK_TI_ADDR_LIMIT]
 | 
			
		||||
	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
 | 
			
		||||
	mov	x20, #TASK_SIZE_64
 | 
			
		||||
	str	x20, [tsk, #TI_ADDR_LIMIT]
 | 
			
		||||
	str	x20, [tsk, #TSK_TI_ADDR_LIMIT]
 | 
			
		||||
	/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
 | 
			
		||||
	.endif /* \el == 0 */
 | 
			
		||||
	mrs	x22, elr_el1
 | 
			
		||||
| 
						 | 
				
			
			@ -139,7 +138,7 @@
 | 
			
		|||
	.if	\el != 0
 | 
			
		||||
	/* Restore the task's original addr_limit. */
 | 
			
		||||
	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
 | 
			
		||||
	str	x20, [tsk, #TI_ADDR_LIMIT]
 | 
			
		||||
	str	x20, [tsk, #TSK_TI_ADDR_LIMIT]
 | 
			
		||||
 | 
			
		||||
	/* No need to restore UAO, it will be restored from SPSR_EL1 */
 | 
			
		||||
	.endif
 | 
			
		||||
| 
						 | 
				
			
			@ -192,13 +191,14 @@ alternative_else_nop_endif
 | 
			
		|||
	mov	x19, sp			// preserve the original sp
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Compare sp with the current thread_info, if the top
 | 
			
		||||
	 * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
 | 
			
		||||
	 * should switch to the irq stack.
 | 
			
		||||
	 * Compare sp with the base of the task stack.
 | 
			
		||||
	 * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
 | 
			
		||||
	 * and should switch to the irq stack.
 | 
			
		||||
	 */
 | 
			
		||||
	and	x25, x19, #~(THREAD_SIZE - 1)
 | 
			
		||||
	cmp	x25, tsk
 | 
			
		||||
	b.ne	9998f
 | 
			
		||||
	ldr	x25, [tsk, TSK_STACK]
 | 
			
		||||
	eor	x25, x25, x19
 | 
			
		||||
	and	x25, x25, #~(THREAD_SIZE - 1)
 | 
			
		||||
	cbnz	x25, 9998f
 | 
			
		||||
 | 
			
		||||
	adr_this_cpu x25, irq_stack, x26
 | 
			
		||||
	mov	x26, #IRQ_STACK_START_SP
 | 
			
		||||
| 
						 | 
				
			
			@ -427,9 +427,9 @@ el1_irq:
 | 
			
		|||
	irq_handler
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_PREEMPT
 | 
			
		||||
	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
 | 
			
		||||
	ldr	w24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
 | 
			
		||||
	cbnz	w24, 1f				// preempt count != 0
 | 
			
		||||
	ldr	x0, [tsk, #TI_FLAGS]		// get flags
 | 
			
		||||
	ldr	x0, [tsk, #TSK_TI_FLAGS]	// get flags
 | 
			
		||||
	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
 | 
			
		||||
	bl	el1_preempt
 | 
			
		||||
1:
 | 
			
		||||
| 
						 | 
				
			
			@ -444,7 +444,7 @@ ENDPROC(el1_irq)
 | 
			
		|||
el1_preempt:
 | 
			
		||||
	mov	x24, lr
 | 
			
		||||
1:	bl	preempt_schedule_irq		// irq en/disable is done inside
 | 
			
		||||
	ldr	x0, [tsk, #TI_FLAGS]		// get new tasks TI_FLAGS
 | 
			
		||||
	ldr	x0, [tsk, #TSK_TI_FLAGS]	// get new tasks TI_FLAGS
 | 
			
		||||
	tbnz	x0, #TIF_NEED_RESCHED, 1b	// needs rescheduling?
 | 
			
		||||
	ret	x24
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -674,8 +674,7 @@ ENTRY(cpu_switch_to)
 | 
			
		|||
	ldp	x29, x9, [x8], #16
 | 
			
		||||
	ldr	lr, [x8]
 | 
			
		||||
	mov	sp, x9
 | 
			
		||||
	and	x9, x9, #~(THREAD_SIZE - 1)
 | 
			
		||||
	msr	sp_el0, x9
 | 
			
		||||
	msr	sp_el0, x1
 | 
			
		||||
	ret
 | 
			
		||||
ENDPROC(cpu_switch_to)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to)
 | 
			
		|||
ret_fast_syscall:
 | 
			
		||||
	disable_irq				// disable interrupts
 | 
			
		||||
	str	x0, [sp, #S_X0]			// returned x0
 | 
			
		||||
	ldr	x1, [tsk, #TI_FLAGS]		// re-check for syscall tracing
 | 
			
		||||
	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 | 
			
		||||
	and	x2, x1, #_TIF_SYSCALL_WORK
 | 
			
		||||
	cbnz	x2, ret_fast_syscall_trace
 | 
			
		||||
	and	x2, x1, #_TIF_WORK_MASK
 | 
			
		||||
| 
						 | 
				
			
			@ -706,14 +705,14 @@ work_pending:
 | 
			
		|||
#ifdef CONFIG_TRACE_IRQFLAGS
 | 
			
		||||
	bl	trace_hardirqs_on		// enabled while in userspace
 | 
			
		||||
#endif
 | 
			
		||||
	ldr	x1, [tsk, #TI_FLAGS]		// re-check for single-step
 | 
			
		||||
	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
 | 
			
		||||
	b	finish_ret_to_user
 | 
			
		||||
/*
 | 
			
		||||
 * "slow" syscall return path.
 | 
			
		||||
 */
 | 
			
		||||
ret_to_user:
 | 
			
		||||
	disable_irq				// disable interrupts
 | 
			
		||||
	ldr	x1, [tsk, #TI_FLAGS]
 | 
			
		||||
	ldr	x1, [tsk, #TSK_TI_FLAGS]
 | 
			
		||||
	and	x2, x1, #_TIF_WORK_MASK
 | 
			
		||||
	cbnz	x2, work_pending
 | 
			
		||||
finish_ret_to_user:
 | 
			
		||||
| 
						 | 
				
			
			@ -746,7 +745,7 @@ el0_svc_naked:					// compat entry point
 | 
			
		|||
	enable_dbg_and_irq
 | 
			
		||||
	ct_user_exit 1
 | 
			
		||||
 | 
			
		||||
	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
 | 
			
		||||
	ldr	x16, [tsk, #TSK_TI_FLAGS]	// check for syscall hooks
 | 
			
		||||
	tst	x16, #_TIF_SYSCALL_WORK
 | 
			
		||||
	b.ne	__sys_trace
 | 
			
		||||
	cmp     scno, sc_nr                     // check upper syscall limit
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
 | 
			
		|||
__primary_switched:
 | 
			
		||||
	adrp	x4, init_thread_union
 | 
			
		||||
	add	sp, x4, #THREAD_SIZE
 | 
			
		||||
	msr	sp_el0, x4			// Save thread_info
 | 
			
		||||
	adr_l	x5, init_task
 | 
			
		||||
	msr	sp_el0, x5			// Save thread_info
 | 
			
		||||
 | 
			
		||||
	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 | 
			
		||||
	msr	vbar_el1, x8			// vector table address
 | 
			
		||||
| 
						 | 
				
			
			@ -699,10 +700,10 @@ __secondary_switched:
 | 
			
		|||
	isb
 | 
			
		||||
 | 
			
		||||
	adr_l	x0, secondary_data
 | 
			
		||||
	ldr	x0, [x0, #CPU_BOOT_STACK]	// get secondary_data.stack
 | 
			
		||||
	mov	sp, x0
 | 
			
		||||
	and	x0, x0, #~(THREAD_SIZE - 1)
 | 
			
		||||
	msr	sp_el0, x0			// save thread_info
 | 
			
		||||
	ldr	x1, [x0, #CPU_BOOT_STACK]	// get secondary_data.stack
 | 
			
		||||
	mov	sp, x1
 | 
			
		||||
	ldr	x2, [x0, #CPU_BOOT_TASK]
 | 
			
		||||
	msr	sp_el0, x2
 | 
			
		||||
	mov	x29, #0
 | 
			
		||||
	b	secondary_start_kernel
 | 
			
		||||
ENDPROC(__secondary_switched)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,7 @@
 | 
			
		|||
#include <linux/personality.h>
 | 
			
		||||
#include <linux/notifier.h>
 | 
			
		||||
#include <trace/events/power.h>
 | 
			
		||||
#include <linux/percpu.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/alternative.h>
 | 
			
		||||
#include <asm/compat.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -321,6 +322,20 @@ void uao_thread_switch(struct task_struct *next)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * We store our current task in sp_el0, which is clobbered by userspace. Keep a
 | 
			
		||||
 * shadow copy so that we can restore this upon entry from userspace.
 | 
			
		||||
 *
 | 
			
		||||
 * This is *only* for exception entry from EL0, and is not valid until we
 | 
			
		||||
 * __switch_to() a user task.
 | 
			
		||||
 */
 | 
			
		||||
DEFINE_PER_CPU(struct task_struct *, __entry_task);
 | 
			
		||||
 | 
			
		||||
static void entry_task_switch(struct task_struct *next)
 | 
			
		||||
{
 | 
			
		||||
	__this_cpu_write(__entry_task, next);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Thread switching.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 | 
			
		|||
	tls_thread_switch(next);
 | 
			
		||||
	hw_breakpoint_thread_switch(next);
 | 
			
		||||
	contextidr_thread_switch(next);
 | 
			
		||||
	entry_task_switch(next);
 | 
			
		||||
	uao_thread_switch(next);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 | 
			
		|||
	 * We need to tell the secondary core where to find its stack and the
 | 
			
		||||
	 * page tables.
 | 
			
		||||
	 */
 | 
			
		||||
	secondary_data.task = idle;
 | 
			
		||||
	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
 | 
			
		||||
	update_cpu_boot_status(CPU_MMU_OFF);
 | 
			
		||||
	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 | 
			
		||||
| 
						 | 
				
			
			@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 | 
			
		|||
		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	secondary_data.task = NULL;
 | 
			
		||||
	secondary_data.stack = NULL;
 | 
			
		||||
	status = READ_ONCE(secondary_data.status);
 | 
			
		||||
	if (ret && status) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue