forked from mirrors/linux
		
	powerpc/string: Implement optimized memset variants
Based on Matthew Wilcox's patches for other architectures. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
		
							parent
							
								
									00e7c259e9
								
							
						
					
					
						commit
						694fc88ce2
					
				
					 2 changed files with 42 additions and 1 deletions
				
			
		| 
						 | 
					@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t);
 | 
				
			||||||
extern int memcmp(const void *,const void *,__kernel_size_t);
 | 
					extern int memcmp(const void *,const void *,__kernel_size_t);
 | 
				
			||||||
extern void * memchr(const void *,int,__kernel_size_t);
 | 
					extern void * memchr(const void *,int,__kernel_size_t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_PPC64
 | 
				
			||||||
 | 
					#define __HAVE_ARCH_MEMSET16
 | 
				
			||||||
 | 
					#define __HAVE_ARCH_MEMSET32
 | 
				
			||||||
 | 
					#define __HAVE_ARCH_MEMSET64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
 | 
				
			||||||
 | 
					extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
 | 
				
			||||||
 | 
					extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return __memset16(p, v, n * 2);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return __memset32(p, v, n * 4);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return __memset64(p, v, n * 8);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#endif /* __KERNEL__ */
 | 
					#endif /* __KERNEL__ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif	/* _ASM_POWERPC_STRING_H */
 | 
					#endif	/* _ASM_POWERPC_STRING_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,6 +13,23 @@
 | 
				
			||||||
#include <asm/ppc_asm.h>
 | 
					#include <asm/ppc_asm.h>
 | 
				
			||||||
#include <asm/export.h>
 | 
					#include <asm/export.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_GLOBAL(__memset16)
 | 
				
			||||||
 | 
						rlwimi	r4,r4,16,0,15
 | 
				
			||||||
 | 
						/* fall through */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_GLOBAL(__memset32)
 | 
				
			||||||
 | 
						rldimi	r4,r4,32,0
 | 
				
			||||||
 | 
						/* fall through */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_GLOBAL(__memset64)
 | 
				
			||||||
 | 
						neg	r0,r3
 | 
				
			||||||
 | 
						andi.	r0,r0,7
 | 
				
			||||||
 | 
						cmplw	cr1,r5,r0
 | 
				
			||||||
 | 
						b	.Lms
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__memset16)
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__memset32)
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(__memset64)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_GLOBAL(memset)
 | 
					_GLOBAL(memset)
 | 
				
			||||||
	neg	r0,r3
 | 
						neg	r0,r3
 | 
				
			||||||
	rlwimi	r4,r4,8,16,23
 | 
						rlwimi	r4,r4,8,16,23
 | 
				
			||||||
| 
						 | 
					@ -20,7 +37,7 @@ _GLOBAL(memset)
 | 
				
			||||||
	rlwimi	r4,r4,16,0,15
 | 
						rlwimi	r4,r4,16,0,15
 | 
				
			||||||
	cmplw	cr1,r5,r0		/* do we get that far? */
 | 
						cmplw	cr1,r5,r0		/* do we get that far? */
 | 
				
			||||||
	rldimi	r4,r4,32,0
 | 
						rldimi	r4,r4,32,0
 | 
				
			||||||
	PPC_MTOCRF(1,r0)
 | 
					.Lms:	PPC_MTOCRF(1,r0)
 | 
				
			||||||
	mr	r6,r3
 | 
						mr	r6,r3
 | 
				
			||||||
	blt	cr1,8f
 | 
						blt	cr1,8f
 | 
				
			||||||
	beq+	3f			/* if already 8-byte aligned */
 | 
						beq+	3f			/* if already 8-byte aligned */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue