mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ext4: reduce contention on s_orphan_lock
Shuffle code around in ext4_orphan_add() and ext4_orphan_del() so that we avoid taking global s_orphan_lock in some cases and hold it for shorter time in other cases. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									cd2c080c33
								
							
						
					
					
						commit
						d745a8c20c
					
				
					 1 changed files with 65 additions and 44 deletions
				
			
		
							
								
								
									
										109
									
								
								fs/ext4/namei.c
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								fs/ext4/namei.c
									
									
									
									
									
								
							| 
						 | 
					@ -2540,13 +2540,17 @@ static int empty_dir(struct inode *inode)
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* ext4_orphan_add() links an unlinked or truncated inode into a list of
 | 
					/*
 | 
				
			||||||
 | 
					 * ext4_orphan_add() links an unlinked or truncated inode into a list of
 | 
				
			||||||
 * such inodes, starting at the superblock, in case we crash before the
 | 
					 * such inodes, starting at the superblock, in case we crash before the
 | 
				
			||||||
 * file is closed/deleted, or in case the inode truncate spans multiple
 | 
					 * file is closed/deleted, or in case the inode truncate spans multiple
 | 
				
			||||||
 * transactions and the last transaction is not recovered after a crash.
 | 
					 * transactions and the last transaction is not recovered after a crash.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * At filesystem recovery time, we walk this list deleting unlinked
 | 
					 * At filesystem recovery time, we walk this list deleting unlinked
 | 
				
			||||||
 * inodes and truncating linked inodes in ext4_orphan_cleanup().
 | 
					 * inodes and truncating linked inodes in ext4_orphan_cleanup().
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Orphan list manipulation functions must be called under i_mutex unless
 | 
				
			||||||
 | 
					 * we are just creating the inode or deleting it.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int ext4_orphan_add(handle_t *handle, struct inode *inode)
 | 
					int ext4_orphan_add(handle_t *handle, struct inode *inode)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -2554,13 +2558,19 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 | 
				
			||||||
	struct ext4_sb_info *sbi = EXT4_SB(sb);
 | 
						struct ext4_sb_info *sbi = EXT4_SB(sb);
 | 
				
			||||||
	struct ext4_iloc iloc;
 | 
						struct ext4_iloc iloc;
 | 
				
			||||||
	int err = 0, rc;
 | 
						int err = 0, rc;
 | 
				
			||||||
 | 
						bool dirty = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!sbi->s_journal)
 | 
						if (!sbi->s_journal)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&sbi->s_orphan_lock);
 | 
						WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
 | 
				
			||||||
 | 
							     !mutex_is_locked(&inode->i_mutex));
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Exit early if inode already is on orphan list. This is a big speedup
 | 
				
			||||||
 | 
						 * since we don't have to contend on the global s_orphan_lock.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	if (!list_empty(&EXT4_I(inode)->i_orphan))
 | 
						if (!list_empty(&EXT4_I(inode)->i_orphan))
 | 
				
			||||||
		goto out_unlock;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Orphan handling is only valid for files with data blocks
 | 
						 * Orphan handling is only valid for files with data blocks
 | 
				
			||||||
| 
						 | 
					@ -2574,44 +2584,47 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 | 
				
			||||||
	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 | 
						BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 | 
				
			||||||
	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 | 
						err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = ext4_reserve_inode_write(handle, inode, &iloc);
 | 
						err = ext4_reserve_inode_write(handle, inode, &iloc);
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&sbi->s_orphan_lock);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Due to previous errors inode may be already a part of on-disk
 | 
						 * Due to previous errors inode may be already a part of on-disk
 | 
				
			||||||
	 * orphan list. If so skip on-disk list modification.
 | 
						 * orphan list. If so skip on-disk list modification.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
 | 
						if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
 | 
				
			||||||
		(le32_to_cpu(sbi->s_es->s_inodes_count)))
 | 
						    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
 | 
				
			||||||
			goto mem_insert;
 | 
							/* Insert this inode at the head of the on-disk orphan list */
 | 
				
			||||||
 | 
							NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
 | 
				
			||||||
	/* Insert this inode at the head of the on-disk orphan list... */
 | 
							sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
 | 
				
			||||||
	NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
 | 
							dirty = true;
 | 
				
			||||||
	sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
 | 
						}
 | 
				
			||||||
	err = ext4_handle_dirty_super(handle, sb);
 | 
						list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
 | 
				
			||||||
	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 | 
						mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
	if (!err)
 | 
					 | 
				
			||||||
		err = rc;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Only add to the head of the in-memory list if all the
 | 
					 | 
				
			||||||
	 * previous operations succeeded.  If the orphan_add is going to
 | 
					 | 
				
			||||||
	 * fail (possibly taking the journal offline), we can't risk
 | 
					 | 
				
			||||||
	 * leaving the inode on the orphan list: stray orphan-list
 | 
					 | 
				
			||||||
	 * entries can cause panics at unmount time.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * This is safe: on error we're going to ignore the orphan list
 | 
					 | 
				
			||||||
	 * anyway on the next recovery. */
 | 
					 | 
				
			||||||
mem_insert:
 | 
					 | 
				
			||||||
	if (!err)
 | 
					 | 
				
			||||||
		list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (dirty) {
 | 
				
			||||||
 | 
							err = ext4_handle_dirty_super(handle, sb);
 | 
				
			||||||
 | 
							rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 | 
				
			||||||
 | 
							if (!err)
 | 
				
			||||||
 | 
								err = rc;
 | 
				
			||||||
 | 
							if (err) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * We have to remove inode from in-memory list if
 | 
				
			||||||
 | 
								 * addition to on disk orphan list failed. Stray orphan
 | 
				
			||||||
 | 
								 * list entries can cause panics at unmount time.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								mutex_lock(&sbi->s_orphan_lock);
 | 
				
			||||||
 | 
								list_del(&EXT4_I(inode)->i_orphan);
 | 
				
			||||||
 | 
								mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 | 
						jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 | 
				
			||||||
	jbd_debug(4, "orphan inode %lu will point to %d\n",
 | 
						jbd_debug(4, "orphan inode %lu will point to %d\n",
 | 
				
			||||||
			inode->i_ino, NEXT_ORPHAN(inode));
 | 
								inode->i_ino, NEXT_ORPHAN(inode));
 | 
				
			||||||
out_unlock:
 | 
					out:
 | 
				
			||||||
	mutex_unlock(&sbi->s_orphan_lock);
 | 
					 | 
				
			||||||
	ext4_std_error(sb, err);
 | 
						ext4_std_error(sb, err);
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2632,35 +2645,43 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 | 
				
			||||||
	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
 | 
						if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&sbi->s_orphan_lock);
 | 
						WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
 | 
				
			||||||
 | 
							     !mutex_is_locked(&inode->i_mutex));
 | 
				
			||||||
 | 
						/* Do this quick check before taking global s_orphan_lock. */
 | 
				
			||||||
	if (list_empty(&ei->i_orphan))
 | 
						if (list_empty(&ei->i_orphan))
 | 
				
			||||||
		goto out;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ino_next = NEXT_ORPHAN(inode);
 | 
						if (handle) {
 | 
				
			||||||
	prev = ei->i_orphan.prev;
 | 
							/* Grab inode buffer early before taking global s_orphan_lock */
 | 
				
			||||||
 | 
							err = ext4_reserve_inode_write(handle, inode, &iloc);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&sbi->s_orphan_lock);
 | 
				
			||||||
	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
 | 
						jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						prev = ei->i_orphan.prev;
 | 
				
			||||||
	list_del_init(&ei->i_orphan);
 | 
						list_del_init(&ei->i_orphan);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* If we're on an error path, we may not have a valid
 | 
						/* If we're on an error path, we may not have a valid
 | 
				
			||||||
	 * transaction handle with which to update the orphan list on
 | 
						 * transaction handle with which to update the orphan list on
 | 
				
			||||||
	 * disk, but we still need to remove the inode from the linked
 | 
						 * disk, but we still need to remove the inode from the linked
 | 
				
			||||||
	 * list in memory. */
 | 
						 * list in memory. */
 | 
				
			||||||
	if (!handle)
 | 
						if (!handle || err) {
 | 
				
			||||||
		goto out;
 | 
							mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	err = ext4_reserve_inode_write(handle, inode, &iloc);
 | 
					 | 
				
			||||||
	if (err)
 | 
					 | 
				
			||||||
		goto out_err;
 | 
							goto out_err;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ino_next = NEXT_ORPHAN(inode);
 | 
				
			||||||
	if (prev == &sbi->s_orphan) {
 | 
						if (prev == &sbi->s_orphan) {
 | 
				
			||||||
		jbd_debug(4, "superblock will point to %u\n", ino_next);
 | 
							jbd_debug(4, "superblock will point to %u\n", ino_next);
 | 
				
			||||||
		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 | 
							BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 | 
				
			||||||
		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 | 
							err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 | 
				
			||||||
		if (err)
 | 
							if (err) {
 | 
				
			||||||
 | 
								mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
			goto out_brelse;
 | 
								goto out_brelse;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
 | 
							sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
 | 
				
			||||||
 | 
							mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
		err = ext4_handle_dirty_super(handle, inode->i_sb);
 | 
							err = ext4_handle_dirty_super(handle, inode->i_sb);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		struct ext4_iloc iloc2;
 | 
							struct ext4_iloc iloc2;
 | 
				
			||||||
| 
						 | 
					@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 | 
				
			||||||
		jbd_debug(4, "orphan inode %lu will point to %u\n",
 | 
							jbd_debug(4, "orphan inode %lu will point to %u\n",
 | 
				
			||||||
			  i_prev->i_ino, ino_next);
 | 
								  i_prev->i_ino, ino_next);
 | 
				
			||||||
		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
 | 
							err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
 | 
				
			||||||
		if (err)
 | 
							if (err) {
 | 
				
			||||||
 | 
								mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
			goto out_brelse;
 | 
								goto out_brelse;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		NEXT_ORPHAN(i_prev) = ino_next;
 | 
							NEXT_ORPHAN(i_prev) = ino_next;
 | 
				
			||||||
		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
 | 
							err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
 | 
				
			||||||
 | 
							mutex_unlock(&sbi->s_orphan_lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		goto out_brelse;
 | 
							goto out_brelse;
 | 
				
			||||||
	NEXT_ORPHAN(inode) = 0;
 | 
						NEXT_ORPHAN(inode) = 0;
 | 
				
			||||||
	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 | 
						err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 | 
				
			||||||
 | 
					 | 
				
			||||||
out_err:
 | 
					out_err:
 | 
				
			||||||
	ext4_std_error(inode->i_sb, err);
 | 
						ext4_std_error(inode->i_sb, err);
 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	mutex_unlock(&sbi->s_orphan_lock);
 | 
					 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_brelse:
 | 
					out_brelse:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue