mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	entry: Provide generic syscall entry functionality
On syscall entry certain work needs to be done: - Establish state (lockdep, context tracking, tracing) - Conditional work (ptrace, seccomp, audit...) This code is needlessly duplicated and different in all architectures. Provide a generic version based on the x86 implementation which has all the RCU and instrumentation bits right. As interrupt/exception entry from user space needs parts of the same functionality, provide a function for this as well. syscall_enter_from_user_mode() and irqentry_enter_from_user_mode() must be called right after the low level ASM entry. The calling code must be non-instrumentable. After the functions returns state is correct and the subsequent functions can be instrumented. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20200722220519.513463269@linutronix.de
This commit is contained in:
		
							parent
							
								
									6823ecabf0
								
							
						
					
					
						commit
						142781e108
					
				
					 5 changed files with 225 additions and 0 deletions
				
			
		| 
						 | 
					@ -27,6 +27,9 @@ config HAVE_IMA_KEXEC
 | 
				
			||||||
config HOTPLUG_SMT
 | 
					config HOTPLUG_SMT
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config GENERIC_ENTRY
 | 
				
			||||||
 | 
					       bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config OPROFILE
 | 
					config OPROFILE
 | 
				
			||||||
	tristate "OProfile system profiling"
 | 
						tristate "OProfile system profiling"
 | 
				
			||||||
	depends on PROFILING
 | 
						depends on PROFILING
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										121
									
								
								include/linux/entry-common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								include/linux/entry-common.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,121 @@
 | 
				
			||||||
 | 
					/* SPDX-License-Identifier: GPL-2.0 */
 | 
				
			||||||
 | 
					#ifndef __LINUX_ENTRYCOMMON_H
 | 
				
			||||||
 | 
					#define __LINUX_ENTRYCOMMON_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/tracehook.h>
 | 
				
			||||||
 | 
					#include <linux/syscalls.h>
 | 
				
			||||||
 | 
					#include <linux/seccomp.h>
 | 
				
			||||||
 | 
					#include <linux/sched.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <asm/entry-common.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Define dummy _TIF work flags if not defined by the architecture or for
 | 
				
			||||||
 | 
					 * disabled functionality.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#ifndef _TIF_SYSCALL_EMU
 | 
				
			||||||
 | 
					# define _TIF_SYSCALL_EMU		(0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef _TIF_SYSCALL_TRACEPOINT
 | 
				
			||||||
 | 
					# define _TIF_SYSCALL_TRACEPOINT	(0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef _TIF_SECCOMP
 | 
				
			||||||
 | 
					# define _TIF_SECCOMP			(0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef _TIF_SYSCALL_AUDIT
 | 
				
			||||||
 | 
					# define _TIF_SYSCALL_AUDIT		(0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * TIF flags handled in syscall_enter_from_usermode()
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#ifndef ARCH_SYSCALL_ENTER_WORK
 | 
				
			||||||
 | 
					# define ARCH_SYSCALL_ENTER_WORK	(0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SYSCALL_ENTER_WORK						\
 | 
				
			||||||
 | 
						(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP |	\
 | 
				
			||||||
 | 
						 _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU |			\
 | 
				
			||||||
 | 
						 ARCH_SYSCALL_ENTER_WORK)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * arch_check_user_regs - Architecture specific sanity check for user mode regs
 | 
				
			||||||
 | 
					 * @regs:	Pointer to currents pt_regs
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Defaults to an empty implementation. Can be replaced by architecture
 | 
				
			||||||
 | 
					 * specific code.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable
 | 
				
			||||||
 | 
					 * section. Use __always_inline so the compiler cannot push it out of line
 | 
				
			||||||
 | 
					 * and make it instrumentable.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static __always_inline void arch_check_user_regs(struct pt_regs *regs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef arch_check_user_regs
 | 
				
			||||||
 | 
					static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
 | 
				
			||||||
 | 
					 * @regs:	Pointer to currents pt_regs
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Returns: 0 on success or an error code to skip the syscall.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Defaults to tracehook_report_syscall_entry(). Can be replaced by
 | 
				
			||||||
 | 
					 * architecture specific code.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Invoked from syscall_enter_from_user_mode()
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef arch_syscall_enter_tracehook
 | 
				
			||||||
 | 
					static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return tracehook_report_syscall_entry(regs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * syscall_enter_from_user_mode - Check and handle work before invoking
 | 
				
			||||||
 | 
					 *				 a syscall
 | 
				
			||||||
 | 
					 * @regs:	Pointer to currents pt_regs
 | 
				
			||||||
 | 
					 * @syscall:	The syscall number
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Invoked from architecture specific syscall entry code with interrupts
 | 
				
			||||||
 | 
					 * disabled. The calling code has to be non-instrumentable. When the
 | 
				
			||||||
 | 
					 * function returns all state is correct and the subsequent functions can be
 | 
				
			||||||
 | 
					 * instrumented.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Returns: The original or a modified syscall number
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * If the returned syscall number is -1 then the syscall should be
 | 
				
			||||||
 | 
					 * skipped. In this case the caller may invoke syscall_set_error() or
 | 
				
			||||||
 | 
					 * syscall_set_return_value() first.  If neither of those are called and -1
 | 
				
			||||||
 | 
					 * is returned, then the syscall will fail with ENOSYS.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The following functionality is handled here:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  1) Establish state (lockdep, RCU (context tracking), tracing)
 | 
				
			||||||
 | 
					 *  2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
 | 
				
			||||||
 | 
					 *     __secure_computing(), trace_sys_enter()
 | 
				
			||||||
 | 
					 *  3) Invocation of audit_syscall_entry()
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
 | 
				
			||||||
 | 
					 * @regs:	Pointer to currents pt_regs
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Invoked from architecture specific entry code with interrupts disabled.
 | 
				
			||||||
 | 
					 * Can only be called when the interrupt entry came from user mode. The
 | 
				
			||||||
 | 
					 * calling code must be non-instrumentable.  When the function returns all
 | 
				
			||||||
 | 
					 * state is correct and the subsequent functions can be instrumented.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * The function establishes state (lockdep, RCU (context tracking), tracing)
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void irqentry_enter_from_user_mode(struct pt_regs *regs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					@ -48,6 +48,7 @@ obj-y += irq/
 | 
				
			||||||
obj-y += rcu/
 | 
					obj-y += rcu/
 | 
				
			||||||
obj-y += livepatch/
 | 
					obj-y += livepatch/
 | 
				
			||||||
obj-y += dma/
 | 
					obj-y += dma/
 | 
				
			||||||
 | 
					obj-y += entry/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 | 
					obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 | 
				
			||||||
obj-$(CONFIG_FREEZER) += freezer.o
 | 
					obj-$(CONFIG_FREEZER) += freezer.o
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										12
									
								
								kernel/entry/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								kernel/entry/Makefile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,12 @@
 | 
				
			||||||
 | 
					# SPDX-License-Identifier: GPL-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Prevent the noinstr section from being pestered by sanitizer and other goodies
 | 
				
			||||||
 | 
					# as long as these things cannot be disabled per function.
 | 
				
			||||||
 | 
					KASAN_SANITIZE := n
 | 
				
			||||||
 | 
					UBSAN_SANITIZE := n
 | 
				
			||||||
 | 
					KCOV_INSTRUMENT := n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CFLAGS_REMOVE_common.o	 = -fstack-protector -fstack-protector-strong
 | 
				
			||||||
 | 
					CFLAGS_common.o		+= -fno-stack-protector
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					obj-$(CONFIG_GENERIC_ENTRY) += common.o
 | 
				
			||||||
							
								
								
									
										88
									
								
								kernel/entry/common.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								kernel/entry/common.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,88 @@
 | 
				
			||||||
 | 
					// SPDX-License-Identifier: GPL-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/context_tracking.h>
 | 
				
			||||||
 | 
					#include <linux/entry-common.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define CREATE_TRACE_POINTS
 | 
				
			||||||
 | 
					#include <trace/events/syscalls.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * enter_from_user_mode - Establish state when coming from user mode
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Syscall/interrupt entry disables interrupts, but user mode is traced as
 | 
				
			||||||
 | 
					 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * 1) Tell lockdep that interrupts are disabled
 | 
				
			||||||
 | 
					 * 2) Invoke context tracking if enabled to reactivate RCU
 | 
				
			||||||
 | 
					 * 3) Trace interrupts off state
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						arch_check_user_regs(regs);
 | 
				
			||||||
 | 
						lockdep_hardirqs_off(CALLER_ADDR0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						CT_WARN_ON(ct_state() != CONTEXT_USER);
 | 
				
			||||||
 | 
						user_exit_irqoff();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						instrumentation_begin();
 | 
				
			||||||
 | 
						trace_hardirqs_off_finish();
 | 
				
			||||||
 | 
						instrumentation_end();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (unlikely(audit_context())) {
 | 
				
			||||||
 | 
							unsigned long args[6];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							syscall_get_arguments(current, regs, args);
 | 
				
			||||||
 | 
							audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static long syscall_trace_enter(struct pt_regs *regs, long syscall,
 | 
				
			||||||
 | 
									unsigned long ti_work)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						long ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Handle ptrace */
 | 
				
			||||||
 | 
						if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
 | 
				
			||||||
 | 
							ret = arch_syscall_enter_tracehook(regs);
 | 
				
			||||||
 | 
							if (ret || (ti_work & _TIF_SYSCALL_EMU))
 | 
				
			||||||
 | 
								return -1L;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do seccomp after ptrace, to catch any tracer changes. */
 | 
				
			||||||
 | 
						if (ti_work & _TIF_SECCOMP) {
 | 
				
			||||||
 | 
							ret = __secure_computing(NULL);
 | 
				
			||||||
 | 
							if (ret == -1L)
 | 
				
			||||||
 | 
								return ret;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
 | 
				
			||||||
 | 
							trace_sys_enter(regs, syscall);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						syscall_enter_audit(regs, syscall);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret ? : syscall;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long ti_work;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						enter_from_user_mode(regs);
 | 
				
			||||||
 | 
						instrumentation_begin();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						local_irq_enable();
 | 
				
			||||||
 | 
						ti_work = READ_ONCE(current_thread_info()->flags);
 | 
				
			||||||
 | 
						if (ti_work & SYSCALL_ENTER_WORK)
 | 
				
			||||||
 | 
							syscall = syscall_trace_enter(regs, syscall, ti_work);
 | 
				
			||||||
 | 
						instrumentation_end();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return syscall;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						enter_from_user_mode(regs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in a new issue