forked from mirrors/linux
		
	kbuild: add support for Clang LTO
This change adds build system support for Clang's Link Time Optimization (LTO). With -flto, instead of ELF object files, Clang produces LLVM bitcode, which is compiled into native code at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html The Kconfig option CONFIG_LTO_CLANG is implemented as a choice, which defaults to LTO being disabled. To use LTO, the architecture must select ARCH_SUPPORTS_LTO_CLANG and support: - compiling with Clang, - compiling all assembly code with Clang's integrated assembler, - and linking with LLD. While using CONFIG_LTO_CLANG_FULL results in the best runtime performance, the compilation is not scalable in time or memory. CONFIG_LTO_CLANG_THIN enables ThinLTO, which allows parallel optimization and faster incremental builds. ThinLTO is used by default if the architecture also selects ARCH_SUPPORTS_LTO_CLANG_THIN: https://clang.llvm.org/docs/ThinLTO.html To enable LTO, LLVM tools must be used to handle bitcode files, by passing LLVM=1 and LLVM_IAS=1 options to make: $ make LLVM=1 LLVM_IAS=1 defconfig $ scripts/config -e LTO_CLANG_THIN $ make LLVM=1 LLVM_IAS=1 To prepare for LTO support with other compilers, common parts are gated behind the CONFIG_LTO option, and LTO can be disabled for specific files by filtering out CC_FLAGS_LTO. Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/r/20201211184633.3213045-3-samitolvanen@google.com
This commit is contained in:
		
							parent
							
								
									3b15cdc159
								
							
						
					
					
						commit
						dc5723b02e
					
				
					 7 changed files with 174 additions and 18 deletions
				
			
		
							
								
								
									
										19
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								Makefile
									
									
									
									
									
								
							|  | @ -893,6 +893,21 @@ KBUILD_CFLAGS	+= $(CC_FLAGS_SCS) | |||
| export CC_FLAGS_SCS | ||||
| endif | ||||
| 
 | ||||
| ifdef CONFIG_LTO_CLANG | ||||
| ifdef CONFIG_LTO_CLANG_THIN | ||||
| CC_FLAGS_LTO	+= -flto=thin -fsplit-lto-unit | ||||
| KBUILD_LDFLAGS	+= --thinlto-cache-dir=$(extmod-prefix).thinlto-cache | ||||
| else | ||||
| CC_FLAGS_LTO	+= -flto | ||||
| endif | ||||
| CC_FLAGS_LTO	+= -fvisibility=hidden | ||||
| endif | ||||
| 
 | ||||
| ifdef CONFIG_LTO | ||||
| KBUILD_CFLAGS	+= $(CC_FLAGS_LTO) | ||||
| export CC_FLAGS_LTO | ||||
| endif | ||||
| 
 | ||||
| ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B | ||||
| KBUILD_CFLAGS += -falign-functions=32 | ||||
| endif | ||||
|  | @ -1479,7 +1494,7 @@ MRPROPER_FILES += include/config include/generated          \ | |||
| 		  *.spec | ||||
| 
 | ||||
| # Directories & files removed with 'make distclean'
 | ||||
| DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS | ||||
| DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache | ||||
| 
 | ||||
| # clean - Delete most, but leave enough to build external modules
 | ||||
| #
 | ||||
|  | @ -1725,7 +1740,7 @@ PHONY += compile_commands.json | |||
| 
 | ||||
| clean-dirs := $(KBUILD_EXTMOD) | ||||
| clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \ | ||||
| 	$(KBUILD_EXTMOD)/compile_commands.json | ||||
| 	$(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache | ||||
| 
 | ||||
| PHONY += help | ||||
| help: | ||||
|  |  | |||
							
								
								
									
										91
									
								
								arch/Kconfig
									
									
									
									
									
								
							
							
						
						
									
										91
									
								
								arch/Kconfig
									
									
									
									
									
								
							|  | @ -631,6 +631,97 @@ config SHADOW_CALL_STACK | |||
| 	  reading and writing arbitrary memory may be able to locate them | ||||
| 	  and hijack control flow by modifying the stacks. | ||||
| 
 | ||||
| config LTO | ||||
| 	bool | ||||
| 	help | ||||
| 	  Selected if the kernel will be built using the compiler's LTO feature. | ||||
| 
 | ||||
| config LTO_CLANG | ||||
| 	bool | ||||
| 	select LTO | ||||
| 	help | ||||
| 	  Selected if the kernel will be built using Clang's LTO feature. | ||||
| 
 | ||||
| config ARCH_SUPPORTS_LTO_CLANG | ||||
| 	bool | ||||
| 	help | ||||
| 	  An architecture should select this option if it supports: | ||||
| 	  - compiling with Clang, | ||||
| 	  - compiling inline assembly with Clang's integrated assembler, | ||||
| 	  - and linking with LLD. | ||||
| 
 | ||||
| config ARCH_SUPPORTS_LTO_CLANG_THIN | ||||
| 	bool | ||||
| 	help | ||||
| 	  An architecture should select this option if it can support Clang's | ||||
| 	  ThinLTO mode. | ||||
| 
 | ||||
| config HAS_LTO_CLANG | ||||
| 	def_bool y | ||||
| 	# Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 | ||||
| 	depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD | ||||
| 	depends on $(success,test $(LLVM) -eq 1) | ||||
| 	depends on $(success,test $(LLVM_IAS) -eq 1) | ||||
| 	depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) | ||||
| 	depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) | ||||
| 	depends on ARCH_SUPPORTS_LTO_CLANG | ||||
| 	depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT | ||||
| 	depends on !KASAN | ||||
| 	depends on !GCOV_KERNEL | ||||
| 	depends on !MODVERSIONS | ||||
| 	help | ||||
| 	  The compiler and Kconfig options support building with Clang's | ||||
| 	  LTO. | ||||
| 
 | ||||
| choice | ||||
| 	prompt "Link Time Optimization (LTO)" | ||||
| 	default LTO_NONE | ||||
| 	help | ||||
| 	  This option enables Link Time Optimization (LTO), which allows the | ||||
| 	  compiler to optimize binaries globally. | ||||
| 
 | ||||
| 	  If unsure, select LTO_NONE. Note that LTO is very resource-intensive | ||||
| 	  so it's disabled by default. | ||||
| 
 | ||||
| config LTO_NONE | ||||
| 	bool "None" | ||||
| 	help | ||||
| 	  Build the kernel normally, without Link Time Optimization (LTO). | ||||
| 
 | ||||
| config LTO_CLANG_FULL | ||||
| 	bool "Clang Full LTO (EXPERIMENTAL)" | ||||
| 	depends on HAS_LTO_CLANG | ||||
| 	depends on !COMPILE_TEST | ||||
| 	select LTO_CLANG | ||||
| 	help | ||||
|           This option enables Clang's full Link Time Optimization (LTO), which | ||||
|           allows the compiler to optimize the kernel globally. If you enable | ||||
|           this option, the compiler generates LLVM bitcode instead of ELF | ||||
|           object files, and the actual compilation from bitcode happens at | ||||
|           the LTO link step, which may take several minutes depending on the | ||||
|           kernel configuration. More information can be found from LLVM's | ||||
|           documentation: | ||||
| 
 | ||||
| 	    https://llvm.org/docs/LinkTimeOptimization.html | ||||
| 
 | ||||
| 	  During link time, this option can use a large amount of RAM, and | ||||
| 	  may take much longer than the ThinLTO option. | ||||
| 
 | ||||
| config LTO_CLANG_THIN | ||||
| 	bool "Clang ThinLTO (EXPERIMENTAL)" | ||||
| 	depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN | ||||
| 	select LTO_CLANG | ||||
| 	help | ||||
| 	  This option enables Clang's ThinLTO, which allows for parallel | ||||
| 	  optimization and faster incremental compiles compared to the | ||||
| 	  CONFIG_LTO_CLANG_FULL option. More information can be found | ||||
| 	  from Clang's documentation: | ||||
| 
 | ||||
| 	    https://clang.llvm.org/docs/ThinLTO.html | ||||
| 
 | ||||
| 	  If unsure, say Y. | ||||
| endchoice | ||||
| 
 | ||||
| config HAVE_ARCH_WITHIN_STACK_FRAMES | ||||
| 	bool | ||||
| 	help | ||||
|  |  | |||
|  | @ -90,15 +90,18 @@ | |||
|  * .data. We don't want to pull in .data..other sections, which Linux | ||||
|  * has defined. Same for text and bss. | ||||
|  * | ||||
|  * With LTO_CLANG, the linker also splits sections by default, so we need | ||||
|  * these macros to combine the sections during the final link. | ||||
|  * | ||||
|  * RODATA_MAIN is not used because existing code already defines .rodata.x | ||||
|  * sections to be brought in with rodata. | ||||
|  */ | ||||
| #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION | ||||
| #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) | ||||
| #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* | ||||
| #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* | ||||
| #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* | ||||
| #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* | ||||
| #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* | ||||
| #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* | ||||
| #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* | ||||
| #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* | ||||
| #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* | ||||
| #else | ||||
| #define TEXT_MAIN .text | ||||
|  |  | |||
|  | @ -111,7 +111,7 @@ endif | |||
| # --------------------------------------------------------------------------- | ||||
| 
 | ||||
| quiet_cmd_cc_s_c = CC $(quiet_modtag)  $@ | ||||
|       cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $< | ||||
|       cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $< | ||||
| 
 | ||||
| $(obj)/%.s: $(src)/%.c FORCE | ||||
| 	$(call if_changed_dep,cc_s_c) | ||||
|  | @ -421,8 +421,15 @@ $(obj)/lib.a: $(lib-y) FORCE | |||
| # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object | ||||
| # module is turned into a multi object module, $^ will contain header file | ||||
| # dependencies recorded in the .*.cmd file. | ||||
| ifdef CONFIG_LTO_CLANG | ||||
| quiet_cmd_link_multi-m = AR [M]  $@ | ||||
| cmd_link_multi-m =						\ | ||||
| 	rm -f $@; 						\ | ||||
| 	$(AR) cDPrsT $@ $(filter %.o,$^) | ||||
| else | ||||
| quiet_cmd_link_multi-m = LD [M]  $@ | ||||
|       cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) | ||||
| endif | ||||
| 
 | ||||
| $(multi-used-m): FORCE | ||||
| 	$(call if_changed,link_multi-m) | ||||
|  |  | |||
|  | @ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M]  $@ | |||
| 
 | ||||
| ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) | ||||
| 
 | ||||
| ifdef CONFIG_LTO_CLANG | ||||
| # With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to | ||||
| # avoid a second slow LTO link | ||||
| prelink-ext := .lto | ||||
| endif | ||||
| 
 | ||||
| quiet_cmd_ld_ko_o = LD [M]  $@ | ||||
|       cmd_ld_ko_o =                                                     \ | ||||
| 	$(LD) -r $(KBUILD_LDFLAGS)					\ | ||||
|  | @ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check),      \ | |||
| 	$(cmd);                                                              \ | ||||
| 	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) | ||||
| 
 | ||||
| 
 | ||||
| # Re-generate module BTFs if either module's .ko or vmlinux changed | ||||
| $(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE | ||||
| $(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE | ||||
| 	+$(call if_changed_except,ld_ko_o,vmlinux) | ||||
| ifdef CONFIG_DEBUG_INFO_BTF_MODULES | ||||
| 	+$(if $(newer-prereqs),$(call cmd,btf_ko)) | ||||
|  |  | |||
|  | @ -43,6 +43,9 @@ __modpost: | |||
| include include/config/auto.conf | ||||
| include scripts/Kbuild.include | ||||
| 
 | ||||
| # for ld_flags | ||||
| include scripts/Makefile.lib | ||||
| 
 | ||||
| MODPOST = scripts/mod/modpost								\ | ||||
| 	$(if $(CONFIG_MODVERSIONS),-m)							\ | ||||
| 	$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)					\ | ||||
|  | @ -102,12 +105,26 @@ $(input-symdump): | |||
| 	@echo >&2 'WARNING: Symbol version dump "$@" is missing.' | ||||
| 	@echo >&2 '         Modules may not have dependencies or modversions.' | ||||
| 
 | ||||
| ifdef CONFIG_LTO_CLANG | ||||
| # With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run | ||||
| # LTO to compile them into native code before running modpost | ||||
| prelink-ext := .lto | ||||
| 
 | ||||
| quiet_cmd_cc_lto_link_modules = LTO [M] $@ | ||||
| cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^ | ||||
| 
 | ||||
| %.lto.o: %.o | ||||
| 	$(call if_changed,cc_lto_link_modules) | ||||
| endif | ||||
| 
 | ||||
| modules := $(sort $(shell cat $(MODORDER))) | ||||
| 
 | ||||
| # Read out modules.order to pass in modpost. | ||||
| # Otherwise, allmodconfig would fail with "Argument list too long". | ||||
| quiet_cmd_modpost = MODPOST $@ | ||||
|       cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T - | ||||
|       cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T - | ||||
| 
 | ||||
| $(output-symdump): $(MODORDER) $(input-symdump) FORCE | ||||
| $(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE | ||||
| 	$(call if_changed,modpost) | ||||
| 
 | ||||
| targets += $(output-symdump) | ||||
|  |  | |||
|  | @ -56,6 +56,14 @@ modpost_link() | |||
| 		${KBUILD_VMLINUX_LIBS}				\ | ||||
| 		--end-group" | ||||
| 
 | ||||
| 	if [ -n "${CONFIG_LTO_CLANG}" ]; then | ||||
| 		# This might take a while, so indicate that we're doing | ||||
| 		# an LTO link | ||||
| 		info LTO ${1} | ||||
| 	else | ||||
| 		info LD ${1} | ||||
| 	fi | ||||
| 
 | ||||
| 	${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} | ||||
| } | ||||
| 
 | ||||
|  | @ -103,6 +111,14 @@ vmlinux_link() | |||
| 	fi | ||||
| 
 | ||||
| 	if [ "${SRCARCH}" != "um" ]; then | ||||
| 		if [ -n "${CONFIG_LTO_CLANG}" ]; then | ||||
| 			# Use vmlinux.o instead of performing the slow LTO | ||||
| 			# link again. | ||||
| 			objects="--whole-archive		\ | ||||
| 				vmlinux.o 			\ | ||||
| 				--no-whole-archive		\ | ||||
| 				${@}" | ||||
| 		else | ||||
| 			objects="--whole-archive		\ | ||||
| 				${KBUILD_VMLINUX_OBJS}		\ | ||||
| 				--no-whole-archive		\ | ||||
|  | @ -110,6 +126,7 @@ vmlinux_link() | |||
| 				${KBUILD_VMLINUX_LIBS}		\ | ||||
| 				--end-group			\ | ||||
| 				${@}" | ||||
| 		fi | ||||
| 
 | ||||
| 		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}	\ | ||||
| 			${strip_debug#-Wl,}			\ | ||||
|  | @ -274,7 +291,6 @@ fi; | |||
| ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1 | ||||
| 
 | ||||
| #link vmlinux.o | ||||
| info LD vmlinux.o | ||||
| modpost_link vmlinux.o | ||||
| objtool_link vmlinux.o | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Sami Tolvanen
						Sami Tolvanen