mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	The __memzero assembly code is almost identical to memset's except for two orr instructions. The runtime performance of __memset(p, n) and memset(p, 0, n) is accordingly almost identical. However, the memset() macro used to guard against a zero length and to call __memzero at compile time when the fill value is a constant zero interferes with compiler optimizations. Arnd found tha the test against a zero length brings up some new warnings with gcc v8: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82103 And successively rremoving the test against a zero length and the call to __memzero optimization produces the following kernel sizes for defconfig with gcc 6: text data bss dec hex filename 12248142 6278960 413588 18940690 1210312 vmlinux.orig 12244474 6278960 413588 18937022 120f4be vmlinux.no_zero_test 12239160 6278960 413588 18931708 120dffc vmlinux.no_memzero So it is probably not worth keeping __memzero around given that the compiler can do a better job at inlining trivial memset(p,0,n) on its own. And the memset code already handles a zero length just fine. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Nicolas Pitre <nico@linaro.org> Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
		
			
				
	
	
		
			45 lines
		
	
	
	
		
			1.4 KiB
		
	
	
	
		
			Makefile
		
	
	
	
	
	
			
		
		
	
	
			45 lines
		
	
	
	
		
			1.4 KiB
		
	
	
	
		
			Makefile
		
	
	
	
	
	
# SPDX-License-Identifier: GPL-2.0
 | 
						|
#
 | 
						|
# linux/arch/arm/lib/Makefile
 | 
						|
#
 | 
						|
# Copyright (C) 1995-2000 Russell King
 | 
						|
#
 | 
						|
 | 
						|
lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 | 
						|
		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
 | 
						|
		   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \
 | 
						|
		   memmove.o memset.o setbit.o                        \
 | 
						|
		   strchr.o strrchr.o                                 \
 | 
						|
		   testchangebit.o testclearbit.o testsetbit.o        \
 | 
						|
		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
 | 
						|
		   ucmpdi2.o lib1funcs.o div64.o                      \
 | 
						|
		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 | 
						|
		   call_with_stack.o bswapsdi2.o
 | 
						|
 | 
						|
mmu-y		:= clear_user.o copy_page.o getuser.o putuser.o       \
 | 
						|
		   copy_from_user.o copy_to_user.o
 | 
						|
 | 
						|
# using lib_ here won't override already available weak symbols
 | 
						|
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
 | 
						|
 | 
						|
lib-$(CONFIG_MMU) += $(mmu-y)
 | 
						|
 | 
						|
ifeq ($(CONFIG_CPU_32v3),y)
 | 
						|
  lib-y	+= io-readsw-armv3.o io-writesw-armv3.o
 | 
						|
else
 | 
						|
  lib-y	+= io-readsw-armv4.o io-writesw-armv4.o
 | 
						|
endif
 | 
						|
 | 
						|
ifeq ($(CONFIG_ARCH_RPC),y)
 | 
						|
  lib-y				+= ecard.o io-acorn.o floppydma.o
 | 
						|
  AFLAGS_delay-loop.o		+= -march=armv4
 | 
						|
endif
 | 
						|
 | 
						|
$(obj)/csumpartialcopy.o:	$(obj)/csumpartialcopygeneric.S
 | 
						|
$(obj)/csumpartialcopyuser.o:	$(obj)/csumpartialcopygeneric.S
 | 
						|
 | 
						|
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
 | 
						|
  NEON_FLAGS			:= -mfloat-abi=softfp -mfpu=neon
 | 
						|
  CFLAGS_xor-neon.o		+= $(NEON_FLAGS)
 | 
						|
  obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o
 | 
						|
endif
 |