mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Ok, this is hacky, and only works on little-endian machines with goo unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC disabled, since it can access up to 7 bytes after the pathname. But it runs like a bat out of hell. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									9f8050c4f9
								
							
						
					
					
						commit
						bfcfaa77bd
					
				
					 4 changed files with 150 additions and 0 deletions
				
			
		| 
						 | 
					@ -82,6 +82,7 @@ config X86
 | 
				
			||||||
	select CLKEVT_I8253
 | 
						select CLKEVT_I8253
 | 
				
			||||||
	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 | 
						select ARCH_HAVE_NMI_SAFE_CMPXCHG
 | 
				
			||||||
	select GENERIC_IOMAP
 | 
						select GENERIC_IOMAP
 | 
				
			||||||
 | 
						select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config INSTRUCTION_DECODER
 | 
					config INSTRUCTION_DECODER
 | 
				
			||||||
	def_bool (KPROBES || PERF_EVENTS)
 | 
						def_bool (KPROBES || PERF_EVENTS)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,6 +4,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
menu "File systems"
 | 
					menu "File systems"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Use unaligned word dcache accesses
 | 
				
			||||||
 | 
					config DCACHE_WORD_ACCESS
 | 
				
			||||||
 | 
					       bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if BLOCK
 | 
					if BLOCK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
source "fs/ext2/Kconfig"
 | 
					source "fs/ext2/Kconfig"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										23
									
								
								fs/dcache.c
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								fs/dcache.c
									
									
									
									
									
								
							| 
						 | 
					@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 | 
				
			||||||
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 | 
					static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 | 
				
			||||||
				const unsigned char *ct, size_t tcount)
 | 
									const unsigned char *ct, size_t tcount)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#ifdef CONFIG_DCACHE_WORD_ACCESS
 | 
				
			||||||
 | 
						unsigned long a,b,mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(scount != tcount))
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;) {
 | 
				
			||||||
 | 
							a = *(unsigned long *)cs;
 | 
				
			||||||
 | 
							b = *(unsigned long *)ct;
 | 
				
			||||||
 | 
							if (tcount < sizeof(unsigned long))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							if (unlikely(a != b))
 | 
				
			||||||
 | 
								return 1;
 | 
				
			||||||
 | 
							cs += sizeof(unsigned long);
 | 
				
			||||||
 | 
							ct += sizeof(unsigned long);
 | 
				
			||||||
 | 
							tcount -= sizeof(unsigned long);
 | 
				
			||||||
 | 
							if (!tcount)
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						mask = ~(~0ul << tcount*8);
 | 
				
			||||||
 | 
						return unlikely(!!((a ^ b) & mask));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
	if (scount != tcount)
 | 
						if (scount != tcount)
 | 
				
			||||||
		return 1;
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 | 
				
			||||||
		tcount--;
 | 
							tcount--;
 | 
				
			||||||
	} while (tcount);
 | 
						} while (tcount);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __d_free(struct rcu_head *head)
 | 
					static void __d_free(struct rcu_head *head)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										122
									
								
								fs/namei.c
									
									
									
									
									
								
							
							
						
						
									
										122
									
								
								fs/namei.c
									
									
									
									
									
								
							| 
						 | 
					@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * We can do the critical dentry name comparison and hashing
 | 
				
			||||||
 | 
					 * operations one word at a time, but we are limited to:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - Architectures with fast unaligned word accesses. We could
 | 
				
			||||||
 | 
					 *   do a "get_unaligned()" if this helps and is sufficiently
 | 
				
			||||||
 | 
					 *   fast.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - Little-endian machines (so that we can generate the mask
 | 
				
			||||||
 | 
					 *   of low bytes efficiently). Again, we *could* do a byte
 | 
				
			||||||
 | 
					 *   swapping load on big-endian architectures if that is not
 | 
				
			||||||
 | 
					 *   expensive enough to make the optimization worthless.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
 | 
				
			||||||
 | 
					 *   do not trap on the (extremely unlikely) case of a page
 | 
				
			||||||
 | 
					 *   crossing operation.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - Furthermore, we need an efficient 64-bit compile for the
 | 
				
			||||||
 | 
					 *   64-bit case in order to generate the "number of bytes in
 | 
				
			||||||
 | 
					 *   the final mask". Again, that could be replaced with a
 | 
				
			||||||
 | 
					 *   efficient population count instruction or similar.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#ifdef CONFIG_DCACHE_WORD_ACCESS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_64BIT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Jan Achrenius on G+: microoptimized version of
 | 
				
			||||||
 | 
					 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
 | 
				
			||||||
 | 
					 * that works for the bytemasks without having to
 | 
				
			||||||
 | 
					 * mask them first.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline long count_masked_bytes(unsigned long mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return mask*0x0001020304050608 >> 56;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline unsigned int fold_hash(unsigned long hash)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						hash += hash >> (8*sizeof(int));
 | 
				
			||||||
 | 
						return hash;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else	/* 32-bit case */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
 | 
				
			||||||
 | 
					static inline long count_masked_bytes(long mask)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
 | 
				
			||||||
 | 
						long a = (0x0ff0001+mask) >> 23;
 | 
				
			||||||
 | 
						/* Fix the 1 for 00 case */
 | 
				
			||||||
 | 
						return a & mask;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define fold_hash(x) (x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long a, mask;
 | 
				
			||||||
 | 
						unsigned long hash = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;) {
 | 
				
			||||||
 | 
							a = *(unsigned long *)name;
 | 
				
			||||||
 | 
							hash *= 9;
 | 
				
			||||||
 | 
							if (len < sizeof(unsigned long))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							hash += a;
 | 
				
			||||||
 | 
							name += sizeof(unsigned long);
 | 
				
			||||||
 | 
							len -= sizeof(unsigned long);
 | 
				
			||||||
 | 
							if (!len)
 | 
				
			||||||
 | 
								goto done;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						mask = ~(~0ul << len*8);
 | 
				
			||||||
 | 
						hash += mask & a;
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
 | 
						return fold_hash(hash);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(full_name_hash);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define ONEBYTES	0x0101010101010101ul
 | 
				
			||||||
 | 
					#define SLASHBYTES	0x2f2f2f2f2f2f2f2ful
 | 
				
			||||||
 | 
					#define HIGHBITS	0x8080808080808080ul
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Return the high bit set in the first byte that is a zero */
 | 
				
			||||||
 | 
					static inline unsigned long has_zero(unsigned long a)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return ((a - ONEBYTES) & ~a) & HIGHBITS;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Calculate the length and hash of the path component, and
 | 
				
			||||||
 | 
					 * return the length of the component;
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long a, mask, hash, len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hash = a = 0;
 | 
				
			||||||
 | 
						len = -sizeof(unsigned long);
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							hash = (hash + a) * 9;
 | 
				
			||||||
 | 
							len += sizeof(unsigned long);
 | 
				
			||||||
 | 
							a = *(unsigned long *)(name+len);
 | 
				
			||||||
 | 
							/* Do we have any NUL or '/' bytes in this word? */
 | 
				
			||||||
 | 
							mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
 | 
				
			||||||
 | 
						} while (!mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* The mask *below* the first high bit set */
 | 
				
			||||||
 | 
						mask = (mask - 1) & ~mask;
 | 
				
			||||||
 | 
						mask >>= 7;
 | 
				
			||||||
 | 
						hash += a & mask;
 | 
				
			||||||
 | 
						*hashp = fold_hash(hash);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return len + count_masked_bytes(mask);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 | 
					unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long hash = init_name_hash();
 | 
						unsigned long hash = init_name_hash();
 | 
				
			||||||
| 
						 | 
					@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 | 
				
			||||||
	return len;
 | 
						return len;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Name resolution.
 | 
					 * Name resolution.
 | 
				
			||||||
 * This is the basic name resolution function, turning a pathname into
 | 
					 * This is the basic name resolution function, turning a pathname into
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue