mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	quota: remove dqptr_sem
Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem, accessing inode->i_dquot now protected by dquot_srcu, and changing inode->i_dquot is now serialized by dq_data_lock. Signed-off-by: Lai Siyao <lai.siyao@intel.com> Signed-off-by: Niu Yawei <yawei.niu@intel.com> Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
		
							parent
							
								
									9eb6463f31
								
							
						
					
					
						commit
						b9ba6f94b2
					
				
					 3 changed files with 49 additions and 67 deletions
				
			
		
							
								
								
									
										114
									
								
								fs/quota/dquot.c
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								fs/quota/dquot.c
									
									
									
									
									
								
							|  | @ -96,13 +96,16 @@ | |||
|  * Note that some things (eg. sb pointer, type, id) doesn't change during | ||||
|  * the life of the dquot structure and so needn't to be protected by a lock | ||||
|  * | ||||
|  * Any operation working on dquots via inode pointers must hold dqptr_sem.  If | ||||
|  * operation is just reading pointers from inode (or not using them at all) the | ||||
|  * read lock is enough. If pointers are altered function must hold write lock. | ||||
|  * Operation accessing dquots via inode pointers are protected by dquot_srcu. | ||||
|  * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and | ||||
|  * synchronize_srcu(&dquot_srcu) is called after clearing pointers from | ||||
|  * inode and before dropping dquot references to avoid use of dquots after | ||||
|  * they are freed. dq_data_lock is used to serialize the pointer setting and | ||||
|  * clearing operations. | ||||
|  * Special care needs to be taken about S_NOQUOTA inode flag (marking that | ||||
|  * inode is a quota file). Functions adding pointers from inode to dquots have | ||||
|  * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they | ||||
|  * have to do all pointer modifications before dropping dqptr_sem. This makes | ||||
|  * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they | ||||
|  * have to do all pointer modifications before dropping dq_data_lock. This makes | ||||
|  * sure they cannot race with quotaon which first sets S_NOQUOTA flag and | ||||
|  * then drops all pointers to dquots from an inode. | ||||
|  * | ||||
|  | @ -116,21 +119,15 @@ | |||
|  * spinlock to internal buffers before writing. | ||||
|  * | ||||
|  * Lock ordering (including related VFS locks) is the following: | ||||
|  *   dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock > | ||||
|  *   dqio_mutex | ||||
|  *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex | ||||
|  * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. | ||||
|  * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > | ||||
|  * dqptr_sem. But filesystem has to count with the fact that functions such as | ||||
|  * dquot_alloc_space() acquire dqptr_sem and they usually have to be called | ||||
|  * from inside a transaction to keep filesystem consistency after a crash. Also | ||||
|  * filesystems usually want to do some IO on dquot from ->mark_dirty which is | ||||
|  * called with dqptr_sem held. | ||||
|  */ | ||||
| 
 | ||||
| static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); | ||||
| static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); | ||||
| __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); | ||||
| EXPORT_SYMBOL(dq_data_lock); | ||||
| DEFINE_STATIC_SRCU(dquot_srcu); | ||||
| 
 | ||||
| void __quota_error(struct super_block *sb, const char *func, | ||||
| 		   const char *fmt, ...) | ||||
|  | @ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
| /*
 | ||||
|  * Remove references to dquots from inode and add dquot to list for freeing | ||||
|  * if we have the last reference to dquot | ||||
|  * We can't race with anybody because we hold dqptr_sem for writing... | ||||
|  */ | ||||
| static void remove_inode_dquot_ref(struct inode *inode, int type, | ||||
| 				   struct list_head *tofree_head) | ||||
|  | @ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
| 		 *  We have to scan also I_NEW inodes because they can already | ||||
| 		 *  have quota pointer initialized. Luckily, we need to touch | ||||
| 		 *  only quota pointers and these have separate locking | ||||
| 		 *  (dqptr_sem). | ||||
| 		 *  (dq_data_lock). | ||||
| 		 */ | ||||
| 		spin_lock(&dq_data_lock); | ||||
| 		if (!IS_NOQUOTA(inode)) { | ||||
| 			if (unlikely(inode_get_rsv_space(inode) > 0)) | ||||
| 				reserved = 1; | ||||
| 			remove_inode_dquot_ref(inode, type, tofree_head); | ||||
| 		} | ||||
| 		spin_unlock(&dq_data_lock); | ||||
| 	} | ||||
| 	spin_unlock(&inode_sb_list_lock); | ||||
| #ifdef CONFIG_QUOTA_DEBUG | ||||
|  | @ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type) | |||
| 	LIST_HEAD(tofree_head); | ||||
| 
 | ||||
| 	if (sb->dq_op) { | ||||
| 		down_write(&sb_dqopt(sb)->dqptr_sem); | ||||
| 		remove_dquot_ref(sb, type, &tofree_head); | ||||
| 		up_write(&sb_dqopt(sb)->dqptr_sem); | ||||
| 		synchronize_srcu(&dquot_srcu); | ||||
| 		put_dquot_list(&tofree_head); | ||||
| 	} | ||||
| } | ||||
|  | @ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode) | |||
| /*
 | ||||
|  * Initialize quota pointers in inode | ||||
|  * | ||||
|  * We do things in a bit complicated way but by that we avoid calling | ||||
|  * dqget() and thus filesystem callbacks under dqptr_sem. | ||||
|  * | ||||
|  * It is better to call this function outside of any transaction as it | ||||
|  * might need a lot of space in journal for dquot structure allocation. | ||||
|  */ | ||||
|  | @ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type) | |||
| 	struct super_block *sb = inode->i_sb; | ||||
| 	qsize_t rsv; | ||||
| 
 | ||||
| 	/* First test before acquiring mutex - solves deadlocks when we
 | ||||
|          * re-enter the quota code and are already holding the mutex */ | ||||
| 	if (!dquot_active(inode)) | ||||
| 		return; | ||||
| 
 | ||||
|  | @ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type) | |||
| 	if (!init_needed) | ||||
| 		return; | ||||
| 
 | ||||
| 	down_write(&sb_dqopt(sb)->dqptr_sem); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	if (IS_NOQUOTA(inode)) | ||||
| 		goto out_err; | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
|  | @ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type) | |||
| 			 * did a write before quota was turned on | ||||
| 			 */ | ||||
| 			rsv = inode_get_rsv_space(inode); | ||||
| 			if (unlikely(rsv)) { | ||||
| 				spin_lock(&dq_data_lock); | ||||
| 			if (unlikely(rsv)) | ||||
| 				dquot_resv_space(inode->i_dquot[cnt], rsv); | ||||
| 				spin_unlock(&dq_data_lock); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| out_err: | ||||
| 	up_write(&sb_dqopt(sb)->dqptr_sem); | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	/* Drop unused references */ | ||||
| 	dqput_all(got); | ||||
| } | ||||
|  | @ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode) | |||
| EXPORT_SYMBOL(dquot_initialize); | ||||
| 
 | ||||
| /*
 | ||||
|  * 	Release all quotas referenced by inode | ||||
|  * Release all quotas referenced by inode. | ||||
|  * | ||||
|  * This function only be called on inode free or converting | ||||
|  * a file to quota file, no other users for the i_dquot in | ||||
|  * both cases, so we needn't call synchronize_srcu() after | ||||
|  * clearing i_dquot. | ||||
|  */ | ||||
| static void __dquot_drop(struct inode *inode) | ||||
| { | ||||
| 	int cnt; | ||||
| 	struct dquot *put[MAXQUOTAS]; | ||||
| 
 | ||||
| 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
| 		put[cnt] = inode->i_dquot[cnt]; | ||||
| 		inode->i_dquot[cnt] = NULL; | ||||
| 	} | ||||
| 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	dqput_all(put); | ||||
| } | ||||
| 
 | ||||
|  | @ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) | |||
|  */ | ||||
| int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | ||||
| { | ||||
| 	int cnt, ret = 0; | ||||
| 	int cnt, ret = 0, index; | ||||
| 	struct dquot_warn warn[MAXQUOTAS]; | ||||
| 	struct dquot **dquots = inode->i_dquot; | ||||
| 	int reserve = flags & DQUOT_SPACE_RESERVE; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * First test before acquiring mutex - solves deadlocks when we | ||||
| 	 * re-enter the quota code and are already holding the mutex | ||||
| 	 */ | ||||
| 	if (!dquot_active(inode)) { | ||||
| 		inode_incr_space(inode, number, reserve); | ||||
| 		goto out; | ||||
|  | @ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | |||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) | ||||
| 		warn[cnt].w_type = QUOTA_NL_NOWARN; | ||||
| 
 | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
| 		if (!dquots[cnt]) | ||||
|  | @ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | |||
| 		goto out_flush_warn; | ||||
| 	mark_all_dquot_dirty(dquots); | ||||
| out_flush_warn: | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	flush_warnings(warn); | ||||
| out: | ||||
| 	return ret; | ||||
|  | @ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space); | |||
|  */ | ||||
| int dquot_alloc_inode(const struct inode *inode) | ||||
| { | ||||
| 	int cnt, ret = 0; | ||||
| 	int cnt, ret = 0, index; | ||||
| 	struct dquot_warn warn[MAXQUOTAS]; | ||||
| 	struct dquot * const *dquots = inode->i_dquot; | ||||
| 
 | ||||
| 	/* First test before acquiring mutex - solves deadlocks when we
 | ||||
|          * re-enter the quota code and are already holding the mutex */ | ||||
| 	if (!dquot_active(inode)) | ||||
| 		return 0; | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) | ||||
| 		warn[cnt].w_type = QUOTA_NL_NOWARN; | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 
 | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
| 		if (!dquots[cnt]) | ||||
|  | @ -1685,7 +1674,7 @@ int dquot_alloc_inode(const struct inode *inode) | |||
| 	spin_unlock(&dq_data_lock); | ||||
| 	if (ret == 0) | ||||
| 		mark_all_dquot_dirty(dquots); | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	flush_warnings(warn); | ||||
| 	return ret; | ||||
| } | ||||
|  | @ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode); | |||
|  */ | ||||
| int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) | ||||
| { | ||||
| 	int cnt; | ||||
| 	int cnt, index; | ||||
| 
 | ||||
| 	if (!dquot_active(inode)) { | ||||
| 		inode_claim_rsv_space(inode, number); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	/* Claim reserved quotas to allocated quotas */ | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
|  | @ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) | |||
| 	inode_claim_rsv_space(inode, number); | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	mark_all_dquot_dirty(inode->i_dquot); | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	return 0; | ||||
| } | ||||
| EXPORT_SYMBOL(dquot_claim_space_nodirty); | ||||
|  | @ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); | |||
|  */ | ||||
| void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) | ||||
| { | ||||
| 	int cnt; | ||||
| 	int cnt, index; | ||||
| 
 | ||||
| 	if (!dquot_active(inode)) { | ||||
| 		inode_reclaim_rsv_space(inode, number); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	/* Claim reserved quotas to allocated quotas */ | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
|  | @ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) | |||
| 	inode_reclaim_rsv_space(inode, number); | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	mark_all_dquot_dirty(inode->i_dquot); | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	return; | ||||
| } | ||||
| EXPORT_SYMBOL(dquot_reclaim_space_nodirty); | ||||
|  | @ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | |||
| 	unsigned int cnt; | ||||
| 	struct dquot_warn warn[MAXQUOTAS]; | ||||
| 	struct dquot **dquots = inode->i_dquot; | ||||
| 	int reserve = flags & DQUOT_SPACE_RESERVE; | ||||
| 	int reserve = flags & DQUOT_SPACE_RESERVE, index; | ||||
| 
 | ||||
| 	/* First test before acquiring mutex - solves deadlocks when we
 | ||||
|          * re-enter the quota code and are already holding the mutex */ | ||||
| 	if (!dquot_active(inode)) { | ||||
| 		inode_decr_space(inode, number, reserve); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
| 		int wtype; | ||||
|  | @ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | |||
| 		goto out_unlock; | ||||
| 	mark_all_dquot_dirty(dquots); | ||||
| out_unlock: | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	flush_warnings(warn); | ||||
| } | ||||
| EXPORT_SYMBOL(__dquot_free_space); | ||||
|  | @ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode) | |||
| 	unsigned int cnt; | ||||
| 	struct dquot_warn warn[MAXQUOTAS]; | ||||
| 	struct dquot * const *dquots = inode->i_dquot; | ||||
| 	int index; | ||||
| 
 | ||||
| 	/* First test before acquiring mutex - solves deadlocks when we
 | ||||
|          * re-enter the quota code and are already holding the mutex */ | ||||
| 	if (!dquot_active(inode)) | ||||
| 		return; | ||||
| 
 | ||||
| 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	index = srcu_read_lock(&dquot_srcu); | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) { | ||||
| 		int wtype; | ||||
|  | @ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode) | |||
| 	} | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	mark_all_dquot_dirty(dquots); | ||||
| 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	srcu_read_unlock(&dquot_srcu, index); | ||||
| 	flush_warnings(warn); | ||||
| } | ||||
| EXPORT_SYMBOL(dquot_free_inode); | ||||
|  | @ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode); | |||
|  * This operation can block, but only after everything is updated | ||||
|  * A transaction must be started when entering this function. | ||||
|  * | ||||
|  * We are holding reference on transfer_from & transfer_to, no need to | ||||
|  * protect them by srcu_read_lock(). | ||||
|  */ | ||||
| int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | ||||
| { | ||||
|  | @ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
| 	struct dquot_warn warn_from_inodes[MAXQUOTAS]; | ||||
| 	struct dquot_warn warn_from_space[MAXQUOTAS]; | ||||
| 
 | ||||
| 	/* First test before acquiring mutex - solves deadlocks when we
 | ||||
|          * re-enter the quota code and are already holding the mutex */ | ||||
| 	if (IS_NOQUOTA(inode)) | ||||
| 		return 0; | ||||
| 	/* Initialize the arrays */ | ||||
|  | @ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
| 		warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; | ||||
| 		warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; | ||||
| 	} | ||||
| 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 
 | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */ | ||||
| 		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 		spin_unlock(&dq_data_lock); | ||||
| 		return 0; | ||||
| 	} | ||||
| 	spin_lock(&dq_data_lock); | ||||
| 	cur_space = inode_get_bytes(inode); | ||||
| 	rsv_space = inode_get_rsv_space(inode); | ||||
| 	space = cur_space + rsv_space; | ||||
|  | @ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
| 		inode->i_dquot[cnt] = transfer_to[cnt]; | ||||
| 	} | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 
 | ||||
| 	mark_all_dquot_dirty(transfer_from); | ||||
| 	mark_all_dquot_dirty(transfer_to); | ||||
|  | @ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) | |||
| 	return 0; | ||||
| over_quota: | ||||
| 	spin_unlock(&dq_data_lock); | ||||
| 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); | ||||
| 	flush_warnings(warn_to); | ||||
| 	return ret; | ||||
| } | ||||
|  |  | |||
|  | @ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
| 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); | ||||
| 	mutex_init(&s->s_dquot.dqio_mutex); | ||||
| 	mutex_init(&s->s_dquot.dqonoff_mutex); | ||||
| 	init_rwsem(&s->s_dquot.dqptr_sem); | ||||
| 	s->s_maxbytes = MAX_NON_LFS; | ||||
| 	s->s_op = &default_op; | ||||
| 	s->s_time_gran = 1000000000; | ||||
|  |  | |||
|  | @ -390,7 +390,6 @@ struct quota_info { | |||
| 	unsigned int flags;			/* Flags for diskquotas on this device */ | ||||
| 	struct mutex dqio_mutex;		/* lock device while I/O in progress */ | ||||
| 	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */ | ||||
| 	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */ | ||||
| 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */ | ||||
| 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */ | ||||
| 	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */ | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Niu Yawei
						Niu Yawei