mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	thp: update futex compound knowledge
Futex code is smarter than most other gup_fast O_DIRECT code and knows about the compound internals. However now doing a put_page(head_page) will not release the pin on the tail page taken by gup-fast, leading to all sort of refcounting bugchecks. Getting a stable head_page is a little tricky. page_head = page is there because if this is not a tail page it's also the page_head. Only in case this is a tail page, compound_head is called, otherwise it's guaranteed unnecessary. And if it's a tail page compound_head has to run atomically inside irq disabled section __get_user_pages_fast before returning. Otherwise ->first_page won't be a stable pointer. Disableing irq before __get_user_page_fast and releasing irq after running compound_head is needed because if __get_user_page_fast returns == 1, it means the huge pmd is established and cannot go away from under us. pmdp_splitting_flush_notify in __split_huge_page_splitting will have to wait for local_irq_enable before the IPI delivery can return. This means __split_huge_page_refcount can't be running from under us, and in turn when we run compound_head(page) we're not reading a dangling pointer from tailpage->first_page. Then after we get to stable head page, we are always safe to call compound_lock and after taking the compound lock on head page we can finally re-check if the page returned by gup-fast is still a tail page. in which case we're set and we didn't need to split the hugepage in order to take a futex on it. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									a95a82e96c
								
							
						
					
					
						commit
						a5b338f2b0
					
				
					 1 changed files with 45 additions and 10 deletions
				
			
		| 
						 | 
					@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long address = (unsigned long)uaddr;
 | 
						unsigned long address = (unsigned long)uaddr;
 | 
				
			||||||
	struct mm_struct *mm = current->mm;
 | 
						struct mm_struct *mm = current->mm;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page, *page_head;
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -265,11 +265,46 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 | 
				
			||||||
	if (err < 0)
 | 
						if (err < 0)
 | 
				
			||||||
		return err;
 | 
							return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page = compound_head(page);
 | 
					#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | 
				
			||||||
	lock_page(page);
 | 
						page_head = page;
 | 
				
			||||||
	if (!page->mapping) {
 | 
						if (unlikely(PageTail(page))) {
 | 
				
			||||||
		unlock_page(page);
 | 
					 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
 | 
							/* serialize against __split_huge_page_splitting() */
 | 
				
			||||||
 | 
							local_irq_disable();
 | 
				
			||||||
 | 
							if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
 | 
				
			||||||
 | 
								page_head = compound_head(page);
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * page_head is valid pointer but we must pin
 | 
				
			||||||
 | 
								 * it before taking the PG_lock and/or
 | 
				
			||||||
 | 
								 * PG_compound_lock. The moment we re-enable
 | 
				
			||||||
 | 
								 * irqs __split_huge_page_splitting() can
 | 
				
			||||||
 | 
								 * return and the head page can be freed from
 | 
				
			||||||
 | 
								 * under us. We can't take the PG_lock and/or
 | 
				
			||||||
 | 
								 * PG_compound_lock on a page that could be
 | 
				
			||||||
 | 
								 * freed from under us.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (page != page_head) {
 | 
				
			||||||
 | 
									get_page(page_head);
 | 
				
			||||||
 | 
									put_page(page);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								local_irq_enable();
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								local_irq_enable();
 | 
				
			||||||
 | 
								goto again;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						page_head = compound_head(page);
 | 
				
			||||||
 | 
						if (page != page_head) {
 | 
				
			||||||
 | 
							get_page(page_head);
 | 
				
			||||||
 | 
							put_page(page);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						lock_page(page_head);
 | 
				
			||||||
 | 
						if (!page_head->mapping) {
 | 
				
			||||||
 | 
							unlock_page(page_head);
 | 
				
			||||||
 | 
							put_page(page_head);
 | 
				
			||||||
		goto again;
 | 
							goto again;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -280,20 +315,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 | 
				
			||||||
	 * it's a read-only handle, it's expected that futexes attach to
 | 
						 * it's a read-only handle, it's expected that futexes attach to
 | 
				
			||||||
	 * the object not the particular process.
 | 
						 * the object not the particular process.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (PageAnon(page)) {
 | 
						if (PageAnon(page_head)) {
 | 
				
			||||||
		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 | 
							key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 | 
				
			||||||
		key->private.mm = mm;
 | 
							key->private.mm = mm;
 | 
				
			||||||
		key->private.address = address;
 | 
							key->private.address = address;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 | 
							key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 | 
				
			||||||
		key->shared.inode = page->mapping->host;
 | 
							key->shared.inode = page_head->mapping->host;
 | 
				
			||||||
		key->shared.pgoff = page->index;
 | 
							key->shared.pgoff = page_head->index;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	get_futex_key_refs(key);
 | 
						get_futex_key_refs(key);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unlock_page(page);
 | 
						unlock_page(page_head);
 | 
				
			||||||
	put_page(page);
 | 
						put_page(page_head);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue