mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm/swap.c: piggyback lru_add_drain_all() calls
This is a very slow operation. Right now POSIX_FADV_DONTNEED is the top user because it has to freeze page references when removing it from the cache. invalidate_bdev() calls it for the same reason. Both are triggered from userspace, so it's easy to generate a storm. mlock/mlockall no longer calls lru_add_drain_all - I've seen here serious slowdown on older kernels. There are some less obvious paths in memory migration/CMA/offlining which shouldn't call frequently. The worst case requires a non-trivial workload because lru_add_drain_all() skips cpus where vectors are empty. Something must constantly generate a flow of pages for each cpu. Also cpus must be busy to make scheduling per-cpu works slower. And the machine must be big enough (64+ cpus in our case). In our case that was a massive series of mlock calls in map-reduce while other tasks write logs (and generates flows of new pages in per-cpu vectors). Mlock calls were serialized by mutex and accumulated latency up to 10 seconds or more. The kernel does not call lru_add_drain_all on mlock paths since 4.15, but the same scenario could be triggered by fadvise(POSIX_FADV_DONTNEED) or any other remaining user. There is no reason to do the drain again if somebody else already drained all the per-cpu vectors while we waited for the lock. Piggyback on a drain starting and finishing while we wait for the lock: all pages pending at the time of our entry were drained from the vectors. Callers like POSIX_FADV_DONTNEED retry their operations once after draining per-cpu vectors when pages have unexpected references. Link: http://lkml.kernel.org/r/157019456205.3142.3369423180908482020.stgit@buzz Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									408a60eddd
								
							
						
					
					
						commit
						eef1a429f2
					
				
					 1 changed files with 15 additions and 1 deletions
				
			
		
							
								
								
									
										16
									
								
								mm/swap.c
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								mm/swap.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -713,9 +713,10 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 | 
			
		|||
 */
 | 
			
		||||
void lru_add_drain_all(void)
 | 
			
		||||
{
 | 
			
		||||
	static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
 | 
			
		||||
	static DEFINE_MUTEX(lock);
 | 
			
		||||
	static struct cpumask has_work;
 | 
			
		||||
	int cpu;
 | 
			
		||||
	int cpu, seq;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Make sure nobody triggers this path before mm_percpu_wq is fully
 | 
			
		||||
| 
						 | 
				
			
			@ -724,7 +725,19 @@ void lru_add_drain_all(void)
 | 
			
		|||
	if (WARN_ON(!mm_percpu_wq))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	seq = raw_read_seqcount_latch(&seqcount);
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&lock);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Piggyback on drain started and finished while we waited for lock:
 | 
			
		||||
	 * all pages pended at the time of our enter were drained from vectors.
 | 
			
		||||
	 */
 | 
			
		||||
	if (__read_seqcount_retry(&seqcount, seq))
 | 
			
		||||
		goto done;
 | 
			
		||||
 | 
			
		||||
	raw_write_seqcount_latch(&seqcount);
 | 
			
		||||
 | 
			
		||||
	cpumask_clear(&has_work);
 | 
			
		||||
 | 
			
		||||
	for_each_online_cpu(cpu) {
 | 
			
		||||
| 
						 | 
				
			
			@ -745,6 +758,7 @@ void lru_add_drain_all(void)
 | 
			
		|||
	for_each_cpu(cpu, &has_work)
 | 
			
		||||
		flush_work(&per_cpu(lru_add_drain_work, cpu));
 | 
			
		||||
 | 
			
		||||
done:
 | 
			
		||||
	mutex_unlock(&lock);
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue