mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	x86/entry: Use generic syscall entry function
Replace the syscall entry work handling with the generic version. Provide the necessary helper inlines to handle the real architecture specific parts, e.g. ptrace. Use a temporary define for idtentry_enter_user which will be cleaned up seperately. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20200722220520.376213694@linutronix.de
This commit is contained in:
		
							parent
							
								
									0bf019ea59
								
							
						
					
					
						commit
						27d6b4d14f
					
				
					 5 changed files with 45 additions and 179 deletions
				
			
		| 
						 | 
				
			
			@ -115,6 +115,7 @@ config X86
 | 
			
		|||
	select GENERIC_CPU_AUTOPROBE
 | 
			
		||||
	select GENERIC_CPU_VULNERABILITIES
 | 
			
		||||
	select GENERIC_EARLY_IOREMAP
 | 
			
		||||
	select GENERIC_ENTRY
 | 
			
		||||
	select GENERIC_FIND_FIRST_BIT
 | 
			
		||||
	select GENERIC_IOMAP
 | 
			
		||||
	select GENERIC_IRQ_EFFECTIVE_AFF_MASK	if SMP
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,13 +10,13 @@
 | 
			
		|||
#include <linux/kernel.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
#include <linux/sched/task_stack.h>
 | 
			
		||||
#include <linux/entry-common.h>
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/smp.h>
 | 
			
		||||
#include <linux/errno.h>
 | 
			
		||||
#include <linux/ptrace.h>
 | 
			
		||||
#include <linux/tracehook.h>
 | 
			
		||||
#include <linux/audit.h>
 | 
			
		||||
#include <linux/seccomp.h>
 | 
			
		||||
#include <linux/signal.h>
 | 
			
		||||
#include <linux/export.h>
 | 
			
		||||
#include <linux/context_tracking.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -42,70 +42,8 @@
 | 
			
		|||
#include <asm/syscall.h>
 | 
			
		||||
#include <asm/irq_stack.h>
 | 
			
		||||
 | 
			
		||||
#define CREATE_TRACE_POINTS
 | 
			
		||||
#include <trace/events/syscalls.h>
 | 
			
		||||
 | 
			
		||||
/* Check that the stack and regs on entry from user mode are sane. */
 | 
			
		||||
static noinstr void check_user_regs(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * Make sure that the entry code gave us a sensible EFLAGS
 | 
			
		||||
		 * register.  Native because we want to check the actual CPU
 | 
			
		||||
		 * state, not the interrupt state as imagined by Xen.
 | 
			
		||||
		 */
 | 
			
		||||
		unsigned long flags = native_save_fl();
 | 
			
		||||
		WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
 | 
			
		||||
				      X86_EFLAGS_NT));
 | 
			
		||||
 | 
			
		||||
		/* We think we came from user mode. Make sure pt_regs agrees. */
 | 
			
		||||
		WARN_ON_ONCE(!user_mode(regs));
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * All entries from user mode (except #DF) should be on the
 | 
			
		||||
		 * normal thread stack and should have user pt_regs in the
 | 
			
		||||
		 * correct location.
 | 
			
		||||
		 */
 | 
			
		||||
		WARN_ON_ONCE(!on_thread_stack());
 | 
			
		||||
		WARN_ON_ONCE(regs != task_pt_regs(current));
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_CONTEXT_TRACKING
 | 
			
		||||
/**
 | 
			
		||||
 * enter_from_user_mode - Establish state when coming from user mode
 | 
			
		||||
 *
 | 
			
		||||
 * Syscall entry disables interrupts, but user mode is traced as interrupts
 | 
			
		||||
 * enabled. Also with NO_HZ_FULL RCU might be idle.
 | 
			
		||||
 *
 | 
			
		||||
 * 1) Tell lockdep that interrupts are disabled
 | 
			
		||||
 * 2) Invoke context tracking if enabled to reactivate RCU
 | 
			
		||||
 * 3) Trace interrupts off state
 | 
			
		||||
 */
 | 
			
		||||
static noinstr void enter_from_user_mode(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	enum ctx_state state = ct_state();
 | 
			
		||||
 | 
			
		||||
	check_user_regs(regs);
 | 
			
		||||
	lockdep_hardirqs_off(CALLER_ADDR0);
 | 
			
		||||
	user_exit_irqoff();
 | 
			
		||||
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
	CT_WARN_ON(state != CONTEXT_USER);
 | 
			
		||||
	trace_hardirqs_off_finish();
 | 
			
		||||
	instrumentation_end();
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	check_user_regs(regs);
 | 
			
		||||
	lockdep_hardirqs_off(CALLER_ADDR0);
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
	trace_hardirqs_off_finish();
 | 
			
		||||
	instrumentation_end();
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * exit_to_user_mode - Fixup state when exiting to user mode
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -129,83 +67,6 @@ static __always_inline void exit_to_user_mode(void)
 | 
			
		|||
	lockdep_hardirqs_on(CALLER_ADDR0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
 | 
			
		||||
{
 | 
			
		||||
#ifdef CONFIG_X86_64
 | 
			
		||||
	if (arch == AUDIT_ARCH_X86_64) {
 | 
			
		||||
		audit_syscall_entry(regs->orig_ax, regs->di,
 | 
			
		||||
				    regs->si, regs->dx, regs->r10);
 | 
			
		||||
	} else
 | 
			
		||||
#endif
 | 
			
		||||
	{
 | 
			
		||||
		audit_syscall_entry(regs->orig_ax, regs->bx,
 | 
			
		||||
				    regs->cx, regs->dx, regs->si);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Returns the syscall nr to run (which should match regs->orig_ax) or -1
 | 
			
		||||
 * to skip the syscall.
 | 
			
		||||
 */
 | 
			
		||||
static long syscall_trace_enter(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 | 
			
		||||
 | 
			
		||||
	struct thread_info *ti = current_thread_info();
 | 
			
		||||
	unsigned long ret = 0;
 | 
			
		||||
	u32 work;
 | 
			
		||||
 | 
			
		||||
	work = READ_ONCE(ti->flags);
 | 
			
		||||
 | 
			
		||||
	if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
 | 
			
		||||
		ret = tracehook_report_syscall_entry(regs);
 | 
			
		||||
		if (ret || (work & _TIF_SYSCALL_EMU))
 | 
			
		||||
			return -1L;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SECCOMP
 | 
			
		||||
	/*
 | 
			
		||||
	 * Do seccomp after ptrace, to catch any tracer changes.
 | 
			
		||||
	 */
 | 
			
		||||
	if (work & _TIF_SECCOMP) {
 | 
			
		||||
		struct seccomp_data sd;
 | 
			
		||||
 | 
			
		||||
		sd.arch = arch;
 | 
			
		||||
		sd.nr = regs->orig_ax;
 | 
			
		||||
		sd.instruction_pointer = regs->ip;
 | 
			
		||||
#ifdef CONFIG_X86_64
 | 
			
		||||
		if (arch == AUDIT_ARCH_X86_64) {
 | 
			
		||||
			sd.args[0] = regs->di;
 | 
			
		||||
			sd.args[1] = regs->si;
 | 
			
		||||
			sd.args[2] = regs->dx;
 | 
			
		||||
			sd.args[3] = regs->r10;
 | 
			
		||||
			sd.args[4] = regs->r8;
 | 
			
		||||
			sd.args[5] = regs->r9;
 | 
			
		||||
		} else
 | 
			
		||||
#endif
 | 
			
		||||
		{
 | 
			
		||||
			sd.args[0] = regs->bx;
 | 
			
		||||
			sd.args[1] = regs->cx;
 | 
			
		||||
			sd.args[2] = regs->dx;
 | 
			
		||||
			sd.args[3] = regs->si;
 | 
			
		||||
			sd.args[4] = regs->di;
 | 
			
		||||
			sd.args[5] = regs->bp;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		ret = __secure_computing(&sd);
 | 
			
		||||
		if (ret == -1)
 | 
			
		||||
			return ret;
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 | 
			
		||||
		trace_sys_enter(regs, regs->orig_ax);
 | 
			
		||||
 | 
			
		||||
	do_audit_syscall_entry(regs, arch);
 | 
			
		||||
 | 
			
		||||
	return ret ?: regs->orig_ax;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define EXIT_TO_USERMODE_LOOP_FLAGS				\
 | 
			
		||||
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
 | 
			
		||||
	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING)
 | 
			
		||||
| 
						 | 
				
			
			@ -366,26 +227,10 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
 | 
			
		|||
	exit_to_user_mode();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
 | 
			
		||||
{
 | 
			
		||||
	struct thread_info *ti;
 | 
			
		||||
 | 
			
		||||
	enter_from_user_mode(regs);
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
 | 
			
		||||
	local_irq_enable();
 | 
			
		||||
	ti = current_thread_info();
 | 
			
		||||
	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
 | 
			
		||||
		nr = syscall_trace_enter(regs);
 | 
			
		||||
 | 
			
		||||
	instrumentation_end();
 | 
			
		||||
	return nr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_X86_64
 | 
			
		||||
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	nr = syscall_enter(regs, nr);
 | 
			
		||||
	nr = syscall_enter_from_user_mode(regs, nr);
 | 
			
		||||
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
	if (likely(nr < NR_syscalls)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -407,6 +252,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 | 
			
		|||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 | 
			
		||||
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int nr = (unsigned int)regs->orig_ax;
 | 
			
		||||
 | 
			
		||||
	if (IS_ENABLED(CONFIG_IA32_EMULATION))
 | 
			
		||||
		current_thread_info()->status |= TS_COMPAT;
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -414,7 +261,7 @@ static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
 | 
			
		|||
	 * orig_ax, the unsigned int return value truncates it.  This may
 | 
			
		||||
	 * or may not be necessary, but it matches the old asm behavior.
 | 
			
		||||
	 */
 | 
			
		||||
	return syscall_enter(regs, (unsigned int)regs->orig_ax);
 | 
			
		||||
	return (unsigned int)syscall_enter_from_user_mode(regs, nr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -568,7 +415,7 @@ SYSCALL_DEFINE0(ni_syscall)
 | 
			
		|||
 * solves the problem of kernel mode pagefaults which can schedule, which
 | 
			
		||||
 * is not possible after invoking rcu_irq_enter() without undoing it.
 | 
			
		||||
 *
 | 
			
		||||
 * For user mode entries enter_from_user_mode() must be invoked to
 | 
			
		||||
 * For user mode entries irqentry_enter_from_user_mode() must be invoked to
 | 
			
		||||
 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
 | 
			
		||||
 * would not be possible.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -584,7 +431,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
 | 
			
		|||
	};
 | 
			
		||||
 | 
			
		||||
	if (user_mode(regs)) {
 | 
			
		||||
		enter_from_user_mode(regs);
 | 
			
		||||
		irqentry_enter_from_user_mode(regs);
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -615,7 +462,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
 | 
			
		|||
		/*
 | 
			
		||||
		 * If RCU is not watching then the same careful
 | 
			
		||||
		 * sequence vs. lockdep and tracing is required
 | 
			
		||||
		 * as in enter_from_user_mode().
 | 
			
		||||
		 * as in irqentry_enter_from_user_mode().
 | 
			
		||||
		 */
 | 
			
		||||
		lockdep_hardirqs_off(CALLER_ADDR0);
 | 
			
		||||
		rcu_irq_enter();
 | 
			
		||||
| 
						 | 
				
			
			@ -708,18 +555,6 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * idtentry_enter_user - Handle state tracking on idtentry from user mode
 | 
			
		||||
 * @regs:	Pointer to pt_regs of interrupted context
 | 
			
		||||
 *
 | 
			
		||||
 * Invokes enter_from_user_mode() to establish the proper context for
 | 
			
		||||
 * NOHZ_FULL. Otherwise scheduling on exit would not be possible.
 | 
			
		||||
 */
 | 
			
		||||
void noinstr idtentry_enter_user(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	enter_from_user_mode(regs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * idtentry_exit_user - Handle return from exception to user mode
 | 
			
		||||
 * @regs:	Pointer to pt_regs (exception entry regs)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										32
									
								
								arch/x86/include/asm/entry-common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								arch/x86/include/asm/entry-common.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,32 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0-only */
 | 
			
		||||
#ifndef _ASM_X86_ENTRY_COMMON_H
 | 
			
		||||
#define _ASM_X86_ENTRY_COMMON_H
 | 
			
		||||
 | 
			
		||||
/* Check that the stack and regs on entry from user mode are sane. */
 | 
			
		||||
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * Make sure that the entry code gave us a sensible EFLAGS
 | 
			
		||||
		 * register.  Native because we want to check the actual CPU
 | 
			
		||||
		 * state, not the interrupt state as imagined by Xen.
 | 
			
		||||
		 */
 | 
			
		||||
		unsigned long flags = native_save_fl();
 | 
			
		||||
		WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
 | 
			
		||||
				      X86_EFLAGS_NT));
 | 
			
		||||
 | 
			
		||||
		/* We think we came from user mode. Make sure pt_regs agrees. */
 | 
			
		||||
		WARN_ON_ONCE(!user_mode(regs));
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * All entries from user mode (except #DF) should be on the
 | 
			
		||||
		 * normal thread stack and should have user pt_regs in the
 | 
			
		||||
		 * correct location.
 | 
			
		||||
		 */
 | 
			
		||||
		WARN_ON_ONCE(!on_thread_stack());
 | 
			
		||||
		WARN_ON_ONCE(regs != task_pt_regs(current));
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
#define arch_check_user_regs arch_check_user_regs
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -6,11 +6,14 @@
 | 
			
		|||
#include <asm/trapnr.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __ASSEMBLY__
 | 
			
		||||
#include <linux/entry-common.h>
 | 
			
		||||
#include <linux/hardirq.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/irq_stack.h>
 | 
			
		||||
 | 
			
		||||
void idtentry_enter_user(struct pt_regs *regs);
 | 
			
		||||
/* Temporary define */
 | 
			
		||||
#define idtentry_enter_user	irqentry_enter_from_user_mode
 | 
			
		||||
 | 
			
		||||
void idtentry_exit_user(struct pt_regs *regs);
 | 
			
		||||
 | 
			
		||||
typedef struct idtentry_state {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -133,11 +133,6 @@ struct thread_info {
 | 
			
		|||
#define _TIF_X32		(1 << TIF_X32)
 | 
			
		||||
#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 | 
			
		||||
 | 
			
		||||
/* Work to do before invoking the actual syscall. */
 | 
			
		||||
#define _TIF_WORK_SYSCALL_ENTRY	\
 | 
			
		||||
	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
 | 
			
		||||
	 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
 | 
			
		||||
 | 
			
		||||
/* flags to check in __switch_to() */
 | 
			
		||||
#define _TIF_WORK_CTXSW_BASE					\
 | 
			
		||||
	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue