forked from mirrors/linux
		
	fs: introduce a per-cpu last_ino allocator
new_inode() dirties a contended cache line to get increasing inode numbers. This limits performance on workloads that cause significant parallel inode allocation. Solve this problem by using a per_cpu variable fed by the shared last_ino in batches of 1024 allocations. This reduces contention on the shared last_ino, and give same spreading ino numbers than before (i.e. same wraparound after 2^32 allocations). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
		
							parent
							
								
									7de9c6ee3e
								
							
						
					
					
						commit
						f991bd2e14
					
				
					 1 changed files with 38 additions and 7 deletions
				
			
		
							
								
								
									
										45
									
								
								fs/inode.c
									
									
									
									
									
								
							
							
						
						
									
										45
									
								
								fs/inode.c
									
									
									
									
									
								
							|  | @ -717,6 +717,43 @@ static struct inode *find_inode_fast(struct super_block *sb, | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Each cpu owns a range of LAST_INO_BATCH numbers. | ||||||
|  |  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, | ||||||
|  |  * to renew the exhausted range. | ||||||
|  |  * | ||||||
|  |  * This does not significantly increase overflow rate because every CPU can | ||||||
|  |  * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is | ||||||
|  |  * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the | ||||||
|  |  * 2^32 range, and is a worst-case. Even a 50% wastage would only increase | ||||||
|  |  * overflow rate by 2x, which does not seem too significant. | ||||||
|  |  * | ||||||
|  |  * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW | ||||||
|  |  * error if st_ino won't fit in target struct field. Use 32bit counter | ||||||
|  |  * here to attempt to avoid that. | ||||||
|  |  */ | ||||||
|  | #define LAST_INO_BATCH 1024 | ||||||
|  | static DEFINE_PER_CPU(unsigned int, last_ino); | ||||||
|  | 
 | ||||||
|  | static unsigned int get_next_ino(void) | ||||||
|  | { | ||||||
|  | 	unsigned int *p = &get_cpu_var(last_ino); | ||||||
|  | 	unsigned int res = *p; | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_SMP | ||||||
|  | 	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { | ||||||
|  | 		static atomic_t shared_last_ino; | ||||||
|  | 		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); | ||||||
|  | 
 | ||||||
|  | 		res = next - LAST_INO_BATCH; | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | 	*p = ++res; | ||||||
|  | 	put_cpu_var(last_ino); | ||||||
|  | 	return res; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  *	new_inode 	- obtain an inode |  *	new_inode 	- obtain an inode | ||||||
|  *	@sb: superblock |  *	@sb: superblock | ||||||
|  | @ -731,12 +768,6 @@ static struct inode *find_inode_fast(struct super_block *sb, | ||||||
|  */ |  */ | ||||||
| struct inode *new_inode(struct super_block *sb) | struct inode *new_inode(struct super_block *sb) | ||||||
| { | { | ||||||
| 	/*
 |  | ||||||
| 	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW |  | ||||||
| 	 * error if st_ino won't fit in target struct field. Use 32bit counter |  | ||||||
| 	 * here to attempt to avoid that. |  | ||||||
| 	 */ |  | ||||||
| 	static unsigned int last_ino; |  | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 
 | 
 | ||||||
| 	spin_lock_prefetch(&inode_lock); | 	spin_lock_prefetch(&inode_lock); | ||||||
|  | @ -745,7 +776,7 @@ struct inode *new_inode(struct super_block *sb) | ||||||
| 	if (inode) { | 	if (inode) { | ||||||
| 		spin_lock(&inode_lock); | 		spin_lock(&inode_lock); | ||||||
| 		__inode_sb_list_add(inode); | 		__inode_sb_list_add(inode); | ||||||
| 		inode->i_ino = ++last_ino; | 		inode->i_ino = get_next_ino(); | ||||||
| 		inode->i_state = 0; | 		inode->i_state = 0; | ||||||
| 		spin_unlock(&inode_lock); | 		spin_unlock(&inode_lock); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet