mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	fix the broken lockdep logic in __sb_start_write()
1. wait_event(frozen < level) without rwsem_acquire_read() is just wrong from lockdep perspective. If we are going to deadlock because the caller is buggy, lockdep can't detect this problem. 2. __sb_start_write() can race with thaw_super() + freeze_super(), and after "goto retry" the 2nd acquire_freeze_lock() is wrong. 3. The "tell lockdep we are doing trylock" hack doesn't look nice. I think this is correct, but this logic should be more explicit. Yes, the recursive read_lock() is fine if we hold the lock on a higher level. But we do not need to fool lockdep. If we can not deadlock in this case then try-lock must not fail and we can use use wait == F throughout this code. Note: as Dave Chinner explains, the "trylock" hack and the fat comment can be probably removed. But this needs a separate change and it will be trivial: just kill __sb_start_write() and rename do_sb_start_write() back to __sb_start_write(). Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Jan Kara <jack@suse.com>
This commit is contained in:
		
							parent
							
								
									bee9182d95
								
							
						
					
					
						commit
						f4b554af99
					
				
					 1 changed files with 40 additions and 33 deletions
				
			
		
							
								
								
									
										73
									
								
								fs/super.c
									
									
									
									
									
								
							
							
						
						
									
										73
									
								
								fs/super.c
									
									
									
									
									
								
							| 
						 | 
					@ -1158,38 +1158,11 @@ void __sb_end_write(struct super_block *sb, int level)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(__sb_end_write);
 | 
					EXPORT_SYMBOL(__sb_end_write);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_LOCKDEP
 | 
					static int do_sb_start_write(struct super_block *sb, int level, bool wait,
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * We want lockdep to tell us about possible deadlocks with freezing but
 | 
					 | 
				
			||||||
 * it's it bit tricky to properly instrument it. Getting a freeze protection
 | 
					 | 
				
			||||||
 * works as getting a read lock but there are subtle problems. XFS for example
 | 
					 | 
				
			||||||
 * gets freeze protection on internal level twice in some cases, which is OK
 | 
					 | 
				
			||||||
 * only because we already hold a freeze protection also on higher level. Due
 | 
					 | 
				
			||||||
 * to these cases we have to tell lockdep we are doing trylock when we
 | 
					 | 
				
			||||||
 * already hold a freeze protection for a higher freeze level.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
 | 
					 | 
				
			||||||
				unsigned long ip)
 | 
									unsigned long ip)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						if (wait)
 | 
				
			||||||
 | 
							rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip);
 | 
				
			||||||
	if (!trylock) {
 | 
					 | 
				
			||||||
		for (i = 0; i < level - 1; i++)
 | 
					 | 
				
			||||||
			if (lock_is_held(&sb->s_writers.lock_map[i])) {
 | 
					 | 
				
			||||||
				trylock = true;
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
 | 
					 | 
				
			||||||
 * instead.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
int __sb_start_write(struct super_block *sb, int level, bool wait)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
retry:
 | 
					retry:
 | 
				
			||||||
	if (unlikely(sb->s_writers.frozen >= level)) {
 | 
						if (unlikely(sb->s_writers.frozen >= level)) {
 | 
				
			||||||
		if (!wait)
 | 
							if (!wait)
 | 
				
			||||||
| 
						 | 
					@ -1198,9 +1171,6 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
 | 
				
			||||||
			   sb->s_writers.frozen < level);
 | 
								   sb->s_writers.frozen < level);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_LOCKDEP
 | 
					 | 
				
			||||||
	acquire_freeze_lock(sb, level, !wait, _RET_IP_);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	percpu_counter_inc(&sb->s_writers.counter[level-1]);
 | 
						percpu_counter_inc(&sb->s_writers.counter[level-1]);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Make sure counter is updated before we check for frozen.
 | 
						 * Make sure counter is updated before we check for frozen.
 | 
				
			||||||
| 
						 | 
					@ -1211,8 +1181,45 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
 | 
				
			||||||
		__sb_end_write(sb, level);
 | 
							__sb_end_write(sb, level);
 | 
				
			||||||
		goto retry;
 | 
							goto retry;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!wait)
 | 
				
			||||||
 | 
							rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip);
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
 | 
				
			||||||
 | 
					 * instead.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int __sb_start_write(struct super_block *sb, int level, bool wait)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool force_trylock = false;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef CONFIG_LOCKDEP
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We want lockdep to tell us about possible deadlocks with freezing
 | 
				
			||||||
 | 
						 * but it's it bit tricky to properly instrument it. Getting a freeze
 | 
				
			||||||
 | 
						 * protection works as getting a read lock but there are subtle
 | 
				
			||||||
 | 
						 * problems. XFS for example gets freeze protection on internal level
 | 
				
			||||||
 | 
						 * twice in some cases, which is OK only because we already hold a
 | 
				
			||||||
 | 
						 * freeze protection also on higher level. Due to these cases we have
 | 
				
			||||||
 | 
						 * to use wait == F (trylock mode) which must not fail.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (wait) {
 | 
				
			||||||
 | 
							int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < level - 1; i++)
 | 
				
			||||||
 | 
								if (lock_is_held(&sb->s_writers.lock_map[i])) {
 | 
				
			||||||
 | 
									force_trylock = true;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_);
 | 
				
			||||||
 | 
						WARN_ON(force_trylock & !ret);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(__sb_start_write);
 | 
					EXPORT_SYMBOL(__sb_start_write);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue