mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Fix get_user_pages() race for write access
There's no real guarantee that handle_mm_fault() will always be able to break a COW situation - if an update from another thread ends up modifying the page table some way, handle_mm_fault() may end up requiring us to re-try the operation. That's normally fine, but get_user_pages() ended up re-trying it as a read, and thus a write access could in theory end up losing the dirty bit or be done on a page that had not been properly COW'ed. This makes get_user_pages() always retry write accesses as write accesses by making "follow_page()" require that a writable follow has the dirty bit set. That simplifies the code and solves the race: if the COW break fails for some reason, we'll just loop around and try again. Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		
							parent
							
								
									8d894c4797
								
							
						
					
					
						commit
						4ceb5db975
					
				
					 1 changed files with 4 additions and 17 deletions
				
			
		
							
								
								
									
										21
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								mm/memory.c
									
									
									
									
									
								
							| 
						 | 
					@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
 | 
				
			||||||
	pte = *ptep;
 | 
						pte = *ptep;
 | 
				
			||||||
	pte_unmap(ptep);
 | 
						pte_unmap(ptep);
 | 
				
			||||||
	if (pte_present(pte)) {
 | 
						if (pte_present(pte)) {
 | 
				
			||||||
		if (write && !pte_write(pte))
 | 
							if (write && !pte_dirty(pte))
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
		if (read && !pte_read(pte))
 | 
							if (read && !pte_read(pte))
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
		pfn = pte_pfn(pte);
 | 
							pfn = pte_pfn(pte);
 | 
				
			||||||
		if (pfn_valid(pfn)) {
 | 
							if (pfn_valid(pfn)) {
 | 
				
			||||||
			page = pfn_to_page(pfn);
 | 
								page = pfn_to_page(pfn);
 | 
				
			||||||
			if (accessed) {
 | 
								if (accessed)
 | 
				
			||||||
				if (write && !pte_dirty(pte) &&!PageDirty(page))
 | 
					 | 
				
			||||||
					set_page_dirty(page);
 | 
					 | 
				
			||||||
				mark_page_accessed(page);
 | 
									mark_page_accessed(page);
 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			return page;
 | 
								return page;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 | 
				
			||||||
		spin_lock(&mm->page_table_lock);
 | 
							spin_lock(&mm->page_table_lock);
 | 
				
			||||||
		do {
 | 
							do {
 | 
				
			||||||
			struct page *page;
 | 
								struct page *page;
 | 
				
			||||||
			int lookup_write = write;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
			cond_resched_lock(&mm->page_table_lock);
 | 
								cond_resched_lock(&mm->page_table_lock);
 | 
				
			||||||
			while (!(page = follow_page(mm, start, lookup_write))) {
 | 
								while (!(page = follow_page(mm, start, write))) {
 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * Shortcut for anonymous pages. We don't want
 | 
									 * Shortcut for anonymous pages. We don't want
 | 
				
			||||||
				 * to force the creation of pages tables for
 | 
									 * to force the creation of pages tables for
 | 
				
			||||||
| 
						 | 
					@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 | 
				
			||||||
				 * nobody touched so far. This is important
 | 
									 * nobody touched so far. This is important
 | 
				
			||||||
				 * for doing a core dump for these mappings.
 | 
									 * for doing a core dump for these mappings.
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				if (!lookup_write &&
 | 
									if (!write && untouched_anonymous_page(mm,vma,start)) {
 | 
				
			||||||
				    untouched_anonymous_page(mm,vma,start)) {
 | 
					 | 
				
			||||||
					page = ZERO_PAGE(start);
 | 
										page = ZERO_PAGE(start);
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
| 
						 | 
					@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 | 
				
			||||||
				default:
 | 
									default:
 | 
				
			||||||
					BUG();
 | 
										BUG();
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				/*
 | 
					 | 
				
			||||||
				 * Now that we have performed a write fault
 | 
					 | 
				
			||||||
				 * and surely no longer have a shared page we
 | 
					 | 
				
			||||||
				 * shouldn't write, we shouldn't ignore an
 | 
					 | 
				
			||||||
				 * unwritable page in the page table if
 | 
					 | 
				
			||||||
				 * we are forcing write access.
 | 
					 | 
				
			||||||
				 */
 | 
					 | 
				
			||||||
				lookup_write = write && !force;
 | 
					 | 
				
			||||||
				spin_lock(&mm->page_table_lock);
 | 
									spin_lock(&mm->page_table_lock);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			if (pages) {
 | 
								if (pages) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue