mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	x86/mm: Fix pgd_lock deadlock
It's forbidden to take the page_table_lock with the irq disabled or if there's contention the IPIs (for tlb flushes) sent with the page_table_lock held will never run leading to a deadlock. Nobody takes the pgd_lock from irq context so the _irqsave can be removed. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: <stable@kernel.org> LKML-Reference: <201102162345.p1GNjMjm021738@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
		
							parent
							
								
									f86268549f
								
							
						
					
					
						commit
						a79e53d856
					
				
					 5 changed files with 22 additions and 30 deletions
				
			
		| 
						 | 
				
			
			@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
 | 
			
		|||
	for (address = VMALLOC_START & PMD_MASK;
 | 
			
		||||
	     address >= TASK_SIZE && address < FIXADDR_TOP;
 | 
			
		||||
	     address += PMD_SIZE) {
 | 
			
		||||
 | 
			
		||||
		unsigned long flags;
 | 
			
		||||
		struct page *page;
 | 
			
		||||
 | 
			
		||||
		spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
		spin_lock(&pgd_lock);
 | 
			
		||||
		list_for_each_entry(page, &pgd_list, lru) {
 | 
			
		||||
			spinlock_t *pgt_lock;
 | 
			
		||||
			pmd_t *ret;
 | 
			
		||||
 | 
			
		||||
			/* the pgt_lock only for Xen */
 | 
			
		||||
			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 | 
			
		||||
 | 
			
		||||
			spin_lock(pgt_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
 | 
			
		|||
			if (!ret)
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
		spin_unlock(&pgd_lock);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 | 
			
		|||
 | 
			
		||||
	for (address = start; address <= end; address += PGDIR_SIZE) {
 | 
			
		||||
		const pgd_t *pgd_ref = pgd_offset_k(address);
 | 
			
		||||
		unsigned long flags;
 | 
			
		||||
		struct page *page;
 | 
			
		||||
 | 
			
		||||
		if (pgd_none(*pgd_ref))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
		spin_lock(&pgd_lock);
 | 
			
		||||
		list_for_each_entry(page, &pgd_list, lru) {
 | 
			
		||||
			pgd_t *pgd;
 | 
			
		||||
			spinlock_t *pgt_lock;
 | 
			
		||||
 | 
			
		||||
			pgd = (pgd_t *)page_address(page) + pgd_index(address);
 | 
			
		||||
			/* the pgt_lock only for Xen */
 | 
			
		||||
			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 | 
			
		||||
			spin_lock(pgt_lock);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 | 
			
		|||
 | 
			
		||||
			spin_unlock(pgt_lock);
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
		spin_unlock(&pgd_lock);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
 | 
			
		|||
 | 
			
		||||
void update_page_count(int level, unsigned long pages)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
 | 
			
		||||
	/* Protect against CPA */
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
	direct_pages_count[level] += pages;
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void split_page_count(int level)
 | 
			
		||||
| 
						 | 
				
			
			@ -394,7 +392,7 @@ static int
 | 
			
		|||
try_preserve_large_page(pte_t *kpte, unsigned long address,
 | 
			
		||||
			struct cpa_data *cpa)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
 | 
			
		||||
	unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
 | 
			
		||||
	pte_t new_pte, old_pte, *tmp;
 | 
			
		||||
	pgprot_t old_prot, new_prot, req_prot;
 | 
			
		||||
	int i, do_split = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 | 
			
		|||
	if (cpa->force_split)
 | 
			
		||||
		return 1;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check for races, another CPU might have split this page
 | 
			
		||||
	 * up already:
 | 
			
		||||
| 
						 | 
				
			
			@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	return do_split;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int split_large_page(pte_t *kpte, unsigned long address)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags, pfn, pfninc = 1;
 | 
			
		||||
	unsigned long pfn, pfninc = 1;
 | 
			
		||||
	unsigned int i, level;
 | 
			
		||||
	pte_t *pbase, *tmp;
 | 
			
		||||
	pgprot_t ref_prot;
 | 
			
		||||
| 
						 | 
				
			
			@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 | 
			
		|||
	if (!base)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Check for races, another CPU might have split this page
 | 
			
		||||
	 * up for us already:
 | 
			
		||||
| 
						 | 
				
			
			@ -591,7 +589,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 | 
			
		|||
	 */
 | 
			
		||||
	if (base)
 | 
			
		||||
		__free_page(base);
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
 | 
			
		|||
 | 
			
		||||
static void pgd_dtor(pgd_t *pgd)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags; /* can be called from interrupt context */
 | 
			
		||||
 | 
			
		||||
	if (SHARED_KERNEL_PMD)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
	pgd_list_del(pgd);
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 | 
			
		|||
{
 | 
			
		||||
	pgd_t *pgd;
 | 
			
		||||
	pmd_t *pmds[PREALLOCATED_PMDS];
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
 | 
			
		||||
	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 | 
			
		|||
	 * respect to anything walking the pgd_list, so that they
 | 
			
		||||
	 * never see a partially populated pgd.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	pgd_ctor(mm, pgd);
 | 
			
		||||
	pgd_prepopulate_pmd(mm, pgd, pmds);
 | 
			
		||||
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	return pgd;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -986,10 +986,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
 | 
			
		|||
 */
 | 
			
		||||
void xen_mm_pin_all(void)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(page, &pgd_list, lru) {
 | 
			
		||||
		if (!PagePinned(page)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -998,7 +997,7 @@ void xen_mm_pin_all(void)
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1099,10 +1098,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
 | 
			
		|||
 */
 | 
			
		||||
void xen_mm_unpin_all(void)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&pgd_lock, flags);
 | 
			
		||||
	spin_lock(&pgd_lock);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(page, &pgd_list, lru) {
 | 
			
		||||
		if (PageSavePinned(page)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1112,7 +1110,7 @@ void xen_mm_unpin_all(void)
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	spin_unlock_irqrestore(&pgd_lock, flags);
 | 
			
		||||
	spin_unlock(&pgd_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue