forked from mirrors/linux
		
	fill_balloon doing memory allocations under balloon_lock
can cause a deadlock when leak_balloon is called from
virtballoon_oom_notify and tries to take same lock.
To fix, split page allocation and enqueue and do allocations outside the lock.
Here's a detailed analysis of the deadlock by Tetsuo Handa:
In leak_balloon(), mutex_lock(&vb->balloon_lock) is called in order to
serialize against fill_balloon(). But in fill_balloon(),
alloc_page(GFP_HIGHUSER[_MOVABLE] | __GFP_NOMEMALLOC | __GFP_NORETRY) is
called with vb->balloon_lock mutex held. Since GFP_HIGHUSER[_MOVABLE]
implies __GFP_DIRECT_RECLAIM | __GFP_IO | __GFP_FS, despite __GFP_NORETRY
is specified, this allocation attempt might indirectly depend on somebody
else's __GFP_DIRECT_RECLAIM memory allocation. And such indirect
__GFP_DIRECT_RECLAIM memory allocation might call leak_balloon() via
virtballoon_oom_notify() via blocking_notifier_call_chain() callback via
out_of_memory() when it reached __alloc_pages_may_oom() and held oom_lock
mutex. Since vb->balloon_lock mutex is already held by fill_balloon(), it
will cause OOM lockup.
  Thread1                                       Thread2
    fill_balloon()
      takes a balloon_lock
      balloon_page_enqueue()
        alloc_page(GFP_HIGHUSER_MOVABLE)
          direct reclaim (__GFP_FS context)       takes a fs lock
            waits for that fs lock                  alloc_page(GFP_NOFS)
                                                      __alloc_pages_may_oom()
                                                        takes the oom_lock
                                                        out_of_memory()
                                                          blocking_notifier_call_chain()
                                                            leak_balloon()
                                                              tries to take that balloon_lock and deadlocks
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
		
	
			
		
			
				
	
	
		
			177 lines
		
	
	
	
		
			5.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			177 lines
		
	
	
	
		
			5.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * mm/balloon_compaction.c
 | 
						|
 *
 | 
						|
 * Common interface for making balloon pages movable by compaction.
 | 
						|
 *
 | 
						|
 * Copyright (C) 2012, Red Hat, Inc.  Rafael Aquini <aquini@redhat.com>
 | 
						|
 */
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/slab.h>
 | 
						|
#include <linux/export.h>
 | 
						|
#include <linux/balloon_compaction.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * balloon_page_alloc - allocates a new page for insertion into the balloon
 | 
						|
 *			  page list.
 | 
						|
 *
 | 
						|
 * Driver must call it to properly allocate a new enlisted balloon page.
 | 
						|
 * Driver must call balloon_page_enqueue before definitively removing it from
 | 
						|
 * the guest system.  This function returns the page address for the recently
 | 
						|
 * allocated page or NULL in the case we fail to allocate a new page this turn.
 | 
						|
 */
 | 
						|
struct page *balloon_page_alloc(void)
 | 
						|
{
 | 
						|
	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
 | 
						|
				       __GFP_NOMEMALLOC | __GFP_NORETRY);
 | 
						|
	return page;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(balloon_page_alloc);
 | 
						|
 | 
						|
/*
 | 
						|
 * balloon_page_enqueue - allocates a new page and inserts it into the balloon
 | 
						|
 *			  page list.
 | 
						|
 * @b_dev_info: balloon device descriptor where we will insert a new page to
 | 
						|
 * @page: new page to enqueue - allocated using balloon_page_alloc.
 | 
						|
 *
 | 
						|
 * Driver must call it to properly enqueue a new allocated balloon page
 | 
						|
 * before definitively removing it from the guest system.
 | 
						|
 * This function returns the page address for the recently enqueued page or
 | 
						|
 * NULL in the case we fail to allocate a new page this turn.
 | 
						|
 */
 | 
						|
void balloon_page_enqueue(struct balloon_dev_info *b_dev_info,
 | 
						|
			  struct page *page)
 | 
						|
{
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Block others from accessing the 'page' when we get around to
 | 
						|
	 * establishing additional references. We should be the only one
 | 
						|
	 * holding a reference to the 'page' at this point.
 | 
						|
	 */
 | 
						|
	BUG_ON(!trylock_page(page));
 | 
						|
	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 | 
						|
	balloon_page_insert(b_dev_info, page);
 | 
						|
	__count_vm_event(BALLOON_INFLATE);
 | 
						|
	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 | 
						|
	unlock_page(page);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(balloon_page_enqueue);
 | 
						|
 | 
						|
/*
 | 
						|
 * balloon_page_dequeue - removes a page from balloon's page list and returns
 | 
						|
 *			  the its address to allow the driver release the page.
 | 
						|
 * @b_dev_info: balloon device decriptor where we will grab a page from.
 | 
						|
 *
 | 
						|
 * Driver must call it to properly de-allocate a previous enlisted balloon page
 | 
						|
 * before definetively releasing it back to the guest system.
 | 
						|
 * This function returns the page address for the recently dequeued page or
 | 
						|
 * NULL in the case we find balloon's page list temporarily empty due to
 | 
						|
 * compaction isolated pages.
 | 
						|
 */
 | 
						|
struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 | 
						|
{
 | 
						|
	struct page *page, *tmp;
 | 
						|
	unsigned long flags;
 | 
						|
	bool dequeued_page;
 | 
						|
 | 
						|
	dequeued_page = false;
 | 
						|
	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 | 
						|
	list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
 | 
						|
		/*
 | 
						|
		 * Block others from accessing the 'page' while we get around
 | 
						|
		 * establishing additional references and preparing the 'page'
 | 
						|
		 * to be released by the balloon driver.
 | 
						|
		 */
 | 
						|
		if (trylock_page(page)) {
 | 
						|
#ifdef CONFIG_BALLOON_COMPACTION
 | 
						|
			if (PageIsolated(page)) {
 | 
						|
				/* raced with isolation */
 | 
						|
				unlock_page(page);
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
#endif
 | 
						|
			balloon_page_delete(page);
 | 
						|
			__count_vm_event(BALLOON_DEFLATE);
 | 
						|
			unlock_page(page);
 | 
						|
			dequeued_page = true;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 | 
						|
 | 
						|
	if (!dequeued_page) {
 | 
						|
		/*
 | 
						|
		 * If we are unable to dequeue a balloon page because the page
 | 
						|
		 * list is empty and there is no isolated pages, then something
 | 
						|
		 * went out of track and some balloon pages are lost.
 | 
						|
		 * BUG() here, otherwise the balloon driver may get stuck into
 | 
						|
		 * an infinite loop while attempting to release all its pages.
 | 
						|
		 */
 | 
						|
		spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 | 
						|
		if (unlikely(list_empty(&b_dev_info->pages) &&
 | 
						|
			     !b_dev_info->isolated_pages))
 | 
						|
			BUG();
 | 
						|
		spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 | 
						|
		page = NULL;
 | 
						|
	}
 | 
						|
	return page;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(balloon_page_dequeue);
 | 
						|
 | 
						|
#ifdef CONFIG_BALLOON_COMPACTION
 | 
						|
 | 
						|
bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
 | 
						|
 | 
						|
{
 | 
						|
	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 | 
						|
	list_del(&page->lru);
 | 
						|
	b_dev_info->isolated_pages++;
 | 
						|
	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
void balloon_page_putback(struct page *page)
 | 
						|
{
 | 
						|
	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 | 
						|
	unsigned long flags;
 | 
						|
 | 
						|
	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 | 
						|
	list_add(&page->lru, &b_dev_info->pages);
 | 
						|
	b_dev_info->isolated_pages--;
 | 
						|
	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* move_to_new_page() counterpart for a ballooned page */
 | 
						|
int balloon_page_migrate(struct address_space *mapping,
 | 
						|
		struct page *newpage, struct page *page,
 | 
						|
		enum migrate_mode mode)
 | 
						|
{
 | 
						|
	struct balloon_dev_info *balloon = balloon_page_device(page);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * We can not easily support the no copy case here so ignore it as it
 | 
						|
	 * is unlikely to be use with ballon pages. See include/linux/hmm.h for
 | 
						|
	 * user of the MIGRATE_SYNC_NO_COPY mode.
 | 
						|
	 */
 | 
						|
	if (mode == MIGRATE_SYNC_NO_COPY)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	VM_BUG_ON_PAGE(!PageLocked(page), page);
 | 
						|
	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 | 
						|
 | 
						|
	return balloon->migratepage(balloon, newpage, page, mode);
 | 
						|
}
 | 
						|
 | 
						|
const struct address_space_operations balloon_aops = {
 | 
						|
	.migratepage = balloon_page_migrate,
 | 
						|
	.isolate_page = balloon_page_isolate,
 | 
						|
	.putback_page = balloon_page_putback,
 | 
						|
};
 | 
						|
EXPORT_SYMBOL_GPL(balloon_aops);
 | 
						|
 | 
						|
#endif /* CONFIG_BALLOON_COMPACTION */
 |