forked from mirrors/linux
		
	mm/huge_memory.c: reorder operations in __split_huge_page_tail()
THP split makes non-atomic change of tail page flags. This is almost ok because tail pages are locked and isolated but this breaks recent changes in page locking: non-atomic operation could clear bit PG_waiters. As a result concurrent sequence get_page_unless_zero() -> lock_page() might block forever. Especially if this page was truncated later. Fix is trivial: clone flags before unfreezing page reference counter. This race exists since commit6290602709("mm: add PageWaiters indicating tasks are waiting for a page bit") while unsave unfreeze itself was added in commit8df651c705("thp: cleanup split_huge_page()"). clear_compound_head() also must be called before unfreezing page reference because after successful get_page_unless_zero() might follow put_page() which needs correct compound_head(). And replace page_ref_inc()/page_ref_add() with page_ref_unfreeze() which is made especially for that and has semantic of smp_store_release(). Link: http://lkml.kernel.org/r/151844393341.210639.13162088407980624477.stgit@buzz Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									03f5d58fa4
								
							
						
					
					
						commit
						605ca5ede7
					
				
					 1 changed files with 15 additions and 21 deletions
				
			
		| 
						 | 
					@ -2356,26 +2356,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
 | 
				
			||||||
	struct page *page_tail = head + tail;
 | 
						struct page *page_tail = head + tail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
 | 
						VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
 | 
				
			||||||
	VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * tail_page->_refcount is zero and not changing from under us. But
 | 
						 * Clone page flags before unfreezing refcount.
 | 
				
			||||||
	 * get_page_unless_zero() may be running from under us on the
 | 
						 *
 | 
				
			||||||
	 * tail_page. If we used atomic_set() below instead of atomic_inc() or
 | 
						 * After successful get_page_unless_zero() might follow flags change,
 | 
				
			||||||
	 * atomic_add(), we would then run atomic_set() concurrently with
 | 
						 * for exmaple lock_page() which set PG_waiters.
 | 
				
			||||||
	 * get_page_unless_zero(), and atomic_set() is implemented in C not
 | 
					 | 
				
			||||||
	 * using locked ops. spin_unlock on x86 sometime uses locked ops
 | 
					 | 
				
			||||||
	 * because of PPro errata 66, 92, so unless somebody can guarantee
 | 
					 | 
				
			||||||
	 * atomic_set() here would be safe on all archs (and not only on x86),
 | 
					 | 
				
			||||||
	 * it's safer to use atomic_inc()/atomic_add().
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (PageAnon(head) && !PageSwapCache(head)) {
 | 
					 | 
				
			||||||
		page_ref_inc(page_tail);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		/* Additional pin to radix tree */
 | 
					 | 
				
			||||||
		page_ref_add(page_tail, 2);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 | 
						page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 | 
				
			||||||
	page_tail->flags |= (head->flags &
 | 
						page_tail->flags |= (head->flags &
 | 
				
			||||||
			((1L << PG_referenced) |
 | 
								((1L << PG_referenced) |
 | 
				
			||||||
| 
						 | 
					@ -2388,14 +2375,21 @@ static void __split_huge_page_tail(struct page *head, int tail,
 | 
				
			||||||
			 (1L << PG_unevictable) |
 | 
								 (1L << PG_unevictable) |
 | 
				
			||||||
			 (1L << PG_dirty)));
 | 
								 (1L << PG_dirty)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/* Page flags must be visible before we make the page non-compound. */
 | 
				
			||||||
	 * After clearing PageTail the gup refcount can be released.
 | 
					 | 
				
			||||||
	 * Page flags also must be visible before we make the page non-compound.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	smp_wmb();
 | 
						smp_wmb();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Clear PageTail before unfreezing page refcount.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * After successful get_page_unless_zero() might follow put_page()
 | 
				
			||||||
 | 
						 * which needs correct compound_head().
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	clear_compound_head(page_tail);
 | 
						clear_compound_head(page_tail);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Finally unfreeze refcount. Additional reference from page cache. */
 | 
				
			||||||
 | 
						page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
 | 
				
			||||||
 | 
										  PageSwapCache(head)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (page_is_young(head))
 | 
						if (page_is_young(head))
 | 
				
			||||||
		set_page_young(page_tail);
 | 
							set_page_young(page_tail);
 | 
				
			||||||
	if (page_is_idle(head))
 | 
						if (page_is_idle(head))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue