mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	entry: Provide generic syscall entry functionality
On syscall entry certain work needs to be done: - Establish state (lockdep, context tracking, tracing) - Conditional work (ptrace, seccomp, audit...) This code is needlessly duplicated and different in all architectures. Provide a generic version based on the x86 implementation which has all the RCU and instrumentation bits right. As interrupt/exception entry from user space needs parts of the same functionality, provide a function for this as well. syscall_enter_from_user_mode() and irqentry_enter_from_user_mode() must be called right after the low level ASM entry. The calling code must be non-instrumentable. After the functions returns state is correct and the subsequent functions can be instrumented. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20200722220519.513463269@linutronix.de
This commit is contained in:
		
							parent
							
								
									6823ecabf0
								
							
						
					
					
						commit
						142781e108
					
				
					 5 changed files with 225 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -27,6 +27,9 @@ config HAVE_IMA_KEXEC
 | 
			
		|||
config HOTPLUG_SMT
 | 
			
		||||
	bool
 | 
			
		||||
 | 
			
		||||
config GENERIC_ENTRY
 | 
			
		||||
       bool
 | 
			
		||||
 | 
			
		||||
config OPROFILE
 | 
			
		||||
	tristate "OProfile system profiling"
 | 
			
		||||
	depends on PROFILING
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										121
									
								
								include/linux/entry-common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								include/linux/entry-common.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,121 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0 */
 | 
			
		||||
#ifndef __LINUX_ENTRYCOMMON_H
 | 
			
		||||
#define __LINUX_ENTRYCOMMON_H
 | 
			
		||||
 | 
			
		||||
#include <linux/tracehook.h>
 | 
			
		||||
#include <linux/syscalls.h>
 | 
			
		||||
#include <linux/seccomp.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/entry-common.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Define dummy _TIF work flags if not defined by the architecture or for
 | 
			
		||||
 * disabled functionality.
 | 
			
		||||
 */
 | 
			
		||||
#ifndef _TIF_SYSCALL_EMU
 | 
			
		||||
# define _TIF_SYSCALL_EMU		(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef _TIF_SYSCALL_TRACEPOINT
 | 
			
		||||
# define _TIF_SYSCALL_TRACEPOINT	(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef _TIF_SECCOMP
 | 
			
		||||
# define _TIF_SECCOMP			(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef _TIF_SYSCALL_AUDIT
 | 
			
		||||
# define _TIF_SYSCALL_AUDIT		(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * TIF flags handled in syscall_enter_from_usermode()
 | 
			
		||||
 */
 | 
			
		||||
#ifndef ARCH_SYSCALL_ENTER_WORK
 | 
			
		||||
# define ARCH_SYSCALL_ENTER_WORK	(0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define SYSCALL_ENTER_WORK						\
 | 
			
		||||
	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP |	\
 | 
			
		||||
	 _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU |			\
 | 
			
		||||
	 ARCH_SYSCALL_ENTER_WORK)
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * arch_check_user_regs - Architecture specific sanity check for user mode regs
 | 
			
		||||
 * @regs:	Pointer to currents pt_regs
 | 
			
		||||
 *
 | 
			
		||||
 * Defaults to an empty implementation. Can be replaced by architecture
 | 
			
		||||
 * specific code.
 | 
			
		||||
 *
 | 
			
		||||
 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable
 | 
			
		||||
 * section. Use __always_inline so the compiler cannot push it out of line
 | 
			
		||||
 * and make it instrumentable.
 | 
			
		||||
 */
 | 
			
		||||
static __always_inline void arch_check_user_regs(struct pt_regs *regs);
 | 
			
		||||
 | 
			
		||||
#ifndef arch_check_user_regs
 | 
			
		||||
static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
 | 
			
		||||
 * @regs:	Pointer to currents pt_regs
 | 
			
		||||
 *
 | 
			
		||||
 * Returns: 0 on success or an error code to skip the syscall.
 | 
			
		||||
 *
 | 
			
		||||
 * Defaults to tracehook_report_syscall_entry(). Can be replaced by
 | 
			
		||||
 * architecture specific code.
 | 
			
		||||
 *
 | 
			
		||||
 * Invoked from syscall_enter_from_user_mode()
 | 
			
		||||
 */
 | 
			
		||||
static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs);
 | 
			
		||||
 | 
			
		||||
#ifndef arch_syscall_enter_tracehook
 | 
			
		||||
static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	return tracehook_report_syscall_entry(regs);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * syscall_enter_from_user_mode - Check and handle work before invoking
 | 
			
		||||
 *				 a syscall
 | 
			
		||||
 * @regs:	Pointer to currents pt_regs
 | 
			
		||||
 * @syscall:	The syscall number
 | 
			
		||||
 *
 | 
			
		||||
 * Invoked from architecture specific syscall entry code with interrupts
 | 
			
		||||
 * disabled. The calling code has to be non-instrumentable. When the
 | 
			
		||||
 * function returns all state is correct and the subsequent functions can be
 | 
			
		||||
 * instrumented.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns: The original or a modified syscall number
 | 
			
		||||
 *
 | 
			
		||||
 * If the returned syscall number is -1 then the syscall should be
 | 
			
		||||
 * skipped. In this case the caller may invoke syscall_set_error() or
 | 
			
		||||
 * syscall_set_return_value() first.  If neither of those are called and -1
 | 
			
		||||
 * is returned, then the syscall will fail with ENOSYS.
 | 
			
		||||
 *
 | 
			
		||||
 * The following functionality is handled here:
 | 
			
		||||
 *
 | 
			
		||||
 *  1) Establish state (lockdep, RCU (context tracking), tracing)
 | 
			
		||||
 *  2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
 | 
			
		||||
 *     __secure_computing(), trace_sys_enter()
 | 
			
		||||
 *  3) Invocation of audit_syscall_entry()
 | 
			
		||||
 */
 | 
			
		||||
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
 | 
			
		||||
 * @regs:	Pointer to currents pt_regs
 | 
			
		||||
 *
 | 
			
		||||
 * Invoked from architecture specific entry code with interrupts disabled.
 | 
			
		||||
 * Can only be called when the interrupt entry came from user mode. The
 | 
			
		||||
 * calling code must be non-instrumentable.  When the function returns all
 | 
			
		||||
 * state is correct and the subsequent functions can be instrumented.
 | 
			
		||||
 *
 | 
			
		||||
 * The function establishes state (lockdep, RCU (context tracking), tracing)
 | 
			
		||||
 */
 | 
			
		||||
void irqentry_enter_from_user_mode(struct pt_regs *regs);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -48,6 +48,7 @@ obj-y += irq/
 | 
			
		|||
obj-y += rcu/
 | 
			
		||||
obj-y += livepatch/
 | 
			
		||||
obj-y += dma/
 | 
			
		||||
obj-y += entry/
 | 
			
		||||
 | 
			
		||||
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 | 
			
		||||
obj-$(CONFIG_FREEZER) += freezer.o
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										12
									
								
								kernel/entry/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								kernel/entry/Makefile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
# SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 | 
			
		||||
# Prevent the noinstr section from being pestered by sanitizer and other goodies
 | 
			
		||||
# as long as these things cannot be disabled per function.
 | 
			
		||||
KASAN_SANITIZE := n
 | 
			
		||||
UBSAN_SANITIZE := n
 | 
			
		||||
KCOV_INSTRUMENT := n
 | 
			
		||||
 | 
			
		||||
CFLAGS_REMOVE_common.o	 = -fstack-protector -fstack-protector-strong
 | 
			
		||||
CFLAGS_common.o		+= -fno-stack-protector
 | 
			
		||||
 | 
			
		||||
obj-$(CONFIG_GENERIC_ENTRY) += common.o
 | 
			
		||||
							
								
								
									
										88
									
								
								kernel/entry/common.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								kernel/entry/common.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,88 @@
 | 
			
		|||
// SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 | 
			
		||||
#include <linux/context_tracking.h>
 | 
			
		||||
#include <linux/entry-common.h>
 | 
			
		||||
 | 
			
		||||
#define CREATE_TRACE_POINTS
 | 
			
		||||
#include <trace/events/syscalls.h>
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * enter_from_user_mode - Establish state when coming from user mode
 | 
			
		||||
 *
 | 
			
		||||
 * Syscall/interrupt entry disables interrupts, but user mode is traced as
 | 
			
		||||
 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
 | 
			
		||||
 *
 | 
			
		||||
 * 1) Tell lockdep that interrupts are disabled
 | 
			
		||||
 * 2) Invoke context tracking if enabled to reactivate RCU
 | 
			
		||||
 * 3) Trace interrupts off state
 | 
			
		||||
 */
 | 
			
		||||
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	arch_check_user_regs(regs);
 | 
			
		||||
	lockdep_hardirqs_off(CALLER_ADDR0);
 | 
			
		||||
 | 
			
		||||
	CT_WARN_ON(ct_state() != CONTEXT_USER);
 | 
			
		||||
	user_exit_irqoff();
 | 
			
		||||
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
	trace_hardirqs_off_finish();
 | 
			
		||||
	instrumentation_end();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
 | 
			
		||||
{
 | 
			
		||||
	if (unlikely(audit_context())) {
 | 
			
		||||
		unsigned long args[6];
 | 
			
		||||
 | 
			
		||||
		syscall_get_arguments(current, regs, args);
 | 
			
		||||
		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static long syscall_trace_enter(struct pt_regs *regs, long syscall,
 | 
			
		||||
				unsigned long ti_work)
 | 
			
		||||
{
 | 
			
		||||
	long ret = 0;
 | 
			
		||||
 | 
			
		||||
	/* Handle ptrace */
 | 
			
		||||
	if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
 | 
			
		||||
		ret = arch_syscall_enter_tracehook(regs);
 | 
			
		||||
		if (ret || (ti_work & _TIF_SYSCALL_EMU))
 | 
			
		||||
			return -1L;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Do seccomp after ptrace, to catch any tracer changes. */
 | 
			
		||||
	if (ti_work & _TIF_SECCOMP) {
 | 
			
		||||
		ret = __secure_computing(NULL);
 | 
			
		||||
		if (ret == -1L)
 | 
			
		||||
			return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
 | 
			
		||||
		trace_sys_enter(regs, syscall);
 | 
			
		||||
 | 
			
		||||
	syscall_enter_audit(regs, syscall);
 | 
			
		||||
 | 
			
		||||
	return ret ? : syscall;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long ti_work;
 | 
			
		||||
 | 
			
		||||
	enter_from_user_mode(regs);
 | 
			
		||||
	instrumentation_begin();
 | 
			
		||||
 | 
			
		||||
	local_irq_enable();
 | 
			
		||||
	ti_work = READ_ONCE(current_thread_info()->flags);
 | 
			
		||||
	if (ti_work & SYSCALL_ENTER_WORK)
 | 
			
		||||
		syscall = syscall_trace_enter(regs, syscall, ti_work);
 | 
			
		||||
	instrumentation_end();
 | 
			
		||||
 | 
			
		||||
	return syscall;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
 | 
			
		||||
{
 | 
			
		||||
	enter_from_user_mode(regs);
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in a new issue