mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock
syzbot is reporting circular locking dependency which involves
zonelist_update_seq seqlock [1], for this lock is checked by memory
allocation requests which do not need to be retried.
One deadlock scenario is kmalloc(GFP_ATOMIC) from an interrupt handler.
  CPU0
  ----
  __build_all_zonelists() {
    write_seqlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount odd
    // e.g. timer interrupt handler runs at this moment
      some_timer_func() {
        kmalloc(GFP_ATOMIC) {
          __alloc_pages_slowpath() {
            read_seqbegin(&zonelist_update_seq) {
              // spins forever because zonelist_update_seq.seqcount is odd
            }
          }
        }
      }
    // e.g. timer interrupt handler finishes
    write_sequnlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount even
  }
This deadlock scenario can be easily eliminated by not calling
read_seqbegin(&zonelist_update_seq) from !__GFP_DIRECT_RECLAIM allocation
requests, for retry is applicable to only __GFP_DIRECT_RECLAIM allocation
requests.  But Michal Hocko does not know whether we should go with this
approach.
Another deadlock scenario which syzbot is reporting is a race between
kmalloc(GFP_ATOMIC) from tty_insert_flip_string_and_push_buffer() with
port->lock held and printk() from __build_all_zonelists() with
zonelist_update_seq held.
  CPU0                                   CPU1
  ----                                   ----
  pty_write() {
    tty_insert_flip_string_and_push_buffer() {
                                         __build_all_zonelists() {
                                           write_seqlock(&zonelist_update_seq);
                                           build_zonelists() {
                                             printk() {
                                               vprintk() {
                                                 vprintk_default() {
                                                   vprintk_emit() {
                                                     console_unlock() {
                                                       console_flush_all() {
                                                         console_emit_next_record() {
                                                           con->write() = serial8250_console_write() {
      spin_lock_irqsave(&port->lock, flags);
      tty_insert_flip_string() {
        tty_insert_flip_string_fixed_flag() {
          __tty_buffer_request_room() {
            tty_buffer_alloc() {
              kmalloc(GFP_ATOMIC | __GFP_NOWARN) {
                __alloc_pages_slowpath() {
                  zonelist_iter_begin() {
                    read_seqbegin(&zonelist_update_seq); // spins forever because zonelist_update_seq.seqcount is odd
                                                             spin_lock_irqsave(&port->lock, flags); // spins forever because port->lock is held
                    }
                  }
                }
              }
            }
          }
        }
      }
      spin_unlock_irqrestore(&port->lock, flags);
                                                             // message is printed to console
                                                             spin_unlock_irqrestore(&port->lock, flags);
                                                           }
                                                         }
                                                       }
                                                     }
                                                   }
                                                 }
                                               }
                                             }
                                           }
                                           write_sequnlock(&zonelist_update_seq);
                                         }
    }
  }
This deadlock scenario can be eliminated by
  preventing interrupt context from calling kmalloc(GFP_ATOMIC)
and
  preventing printk() from calling console_flush_all()
while zonelist_update_seq.seqcount is odd.
Since Petr Mladek thinks that __build_all_zonelists() can become a
candidate for deferring printk() [2], let's address this problem by
  disabling local interrupts in order to avoid kmalloc(GFP_ATOMIC)
and
  disabling synchronous printk() in order to avoid console_flush_all()
.
As a side effect of minimizing duration of zonelist_update_seq.seqcount
being odd by disabling synchronous printk(), latency at
read_seqbegin(&zonelist_update_seq) for both !__GFP_DIRECT_RECLAIM and
__GFP_DIRECT_RECLAIM allocation requests will be reduced.  Although, from
lockdep perspective, not calling read_seqbegin(&zonelist_update_seq) (i.e.
do not record unnecessary locking dependency) from interrupt context is
still preferable, even if we don't allow calling kmalloc(GFP_ATOMIC)
inside
write_seqlock(&zonelist_update_seq)/write_sequnlock(&zonelist_update_seq)
section...
Link: https://lkml.kernel.org/r/8796b95c-3da3-5885-fddd-6ef55f30e4d3@I-love.SAKURA.ne.jp
Fixes: 3d36424b3b ("mm/page_alloc: fix race condition between build_all_zonelists and page allocation")
Link: https://lkml.kernel.org/r/ZCrs+1cDqPWTDFNM@alley [2]
Reported-by: syzbot <syzbot+223c7461c58c58a4cb10@syzkaller.appspotmail.com>
  Link: https://syzkaller.appspot.com/bug?extid=223c7461c58c58a4cb10 [1]
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Petr Mladek <pmladek@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Patrick Daly <quic_pdaly@quicinc.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									659c0ce1cb
								
							
						
					
					
						commit
						1007843a91
					
				
					 1 changed files with 16 additions and 0 deletions
				
			
		| 
						 | 
					@ -6632,7 +6632,21 @@ static void __build_all_zonelists(void *data)
 | 
				
			||||||
	int nid;
 | 
						int nid;
 | 
				
			||||||
	int __maybe_unused cpu;
 | 
						int __maybe_unused cpu;
 | 
				
			||||||
	pg_data_t *self = data;
 | 
						pg_data_t *self = data;
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Explicitly disable this CPU's interrupts before taking seqlock
 | 
				
			||||||
 | 
						 * to prevent any IRQ handler from calling into the page allocator
 | 
				
			||||||
 | 
						 * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						local_irq_save(flags);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Explicitly disable this CPU's synchronous printk() before taking
 | 
				
			||||||
 | 
						 * seqlock to prevent any printk() from trying to hold port->lock, for
 | 
				
			||||||
 | 
						 * tty_insert_flip_string_and_push_buffer() on other CPU might be
 | 
				
			||||||
 | 
						 * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						printk_deferred_enter();
 | 
				
			||||||
	write_seqlock(&zonelist_update_seq);
 | 
						write_seqlock(&zonelist_update_seq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_NUMA
 | 
					#ifdef CONFIG_NUMA
 | 
				
			||||||
| 
						 | 
					@ -6671,6 +6685,8 @@ static void __build_all_zonelists(void *data)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	write_sequnlock(&zonelist_update_seq);
 | 
						write_sequnlock(&zonelist_update_seq);
 | 
				
			||||||
 | 
						printk_deferred_exit();
 | 
				
			||||||
 | 
						local_irq_restore(flags);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static noinline void __init
 | 
					static noinline void __init
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue