forked from mirrors/linux
		
	[PATCH] fix get_user_pages bug
Checking pte_dirty instead of pte_write in __follow_page is problematic for s390, and for copy_one_pte which leaves dirty when clearing write. So revert __follow_page to check pte_write as before, and make do_wp_page pass back a special extra VM_FAULT_WRITE bit to say it has done its full job: once get_user_pages receives this value, it no longer requires pte_write in __follow_page. But most callers of handle_mm_fault, in the various architectures, have switch statements which do not expect this new case. To avoid changing them all in a hurry, make an inline wrapper function (using the old name) that masks off the new bit, and use the extended interface with double underscores. Yes, we do have a call to do_wp_page from do_swap_page, but no need to change that: in rare case it's needed, another do_wp_page will follow. Signed-off-by: Hugh Dickins <hugh@veritas.com> [ Cleanups by Nick Piggin ] Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		
							parent
							
								
									5cb4cc0d82
								
							
						
					
					
						commit
						f33ea7f404
					
				
					 2 changed files with 40 additions and 13 deletions
				
			
		|  | @ -625,10 +625,16 @@ static inline int page_mapped(struct page *page) | |||
|  * Used to decide whether a process gets delivered SIGBUS or | ||||
|  * just gets major/minor fault counters bumped up. | ||||
|  */ | ||||
| #define VM_FAULT_OOM	(-1) | ||||
| #define VM_FAULT_SIGBUS	0 | ||||
| #define VM_FAULT_MINOR	1 | ||||
| #define VM_FAULT_MAJOR	2 | ||||
| #define VM_FAULT_OOM	0x00 | ||||
| #define VM_FAULT_SIGBUS	0x01 | ||||
| #define VM_FAULT_MINOR	0x02 | ||||
| #define VM_FAULT_MAJOR	0x03 | ||||
| 
 | ||||
| /* 
 | ||||
|  * Special case for get_user_pages. | ||||
|  * Must be in a distinct bit from the above VM_FAULT_ flags. | ||||
|  */ | ||||
| #define VM_FAULT_WRITE	0x10 | ||||
| 
 | ||||
| #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK) | ||||
| 
 | ||||
|  | @ -704,7 +710,13 @@ extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsign | |||
| extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); | ||||
| extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); | ||||
| extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); | ||||
| extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); | ||||
| extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); | ||||
| 
 | ||||
| static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) | ||||
| { | ||||
| 	return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE); | ||||
| } | ||||
| 
 | ||||
| extern int make_pages_present(unsigned long addr, unsigned long end); | ||||
| extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); | ||||
| void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); | ||||
|  |  | |||
							
								
								
									
										31
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								mm/memory.c
									
									
									
									
									
								
							|  | @ -811,15 +811,18 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, | |||
| 	pte = *ptep; | ||||
| 	pte_unmap(ptep); | ||||
| 	if (pte_present(pte)) { | ||||
| 		if (write && !pte_dirty(pte)) | ||||
| 		if (write && !pte_write(pte)) | ||||
| 			goto out; | ||||
| 		if (read && !pte_read(pte)) | ||||
| 			goto out; | ||||
| 		pfn = pte_pfn(pte); | ||||
| 		if (pfn_valid(pfn)) { | ||||
| 			page = pfn_to_page(pfn); | ||||
| 			if (accessed) | ||||
| 			if (accessed) { | ||||
| 				if (write && !pte_dirty(pte) &&!PageDirty(page)) | ||||
| 					set_page_dirty(page); | ||||
| 				mark_page_accessed(page); | ||||
| 			} | ||||
| 			return page; | ||||
| 		} | ||||
| 	} | ||||
|  | @ -941,10 +944,11 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 		} | ||||
| 		spin_lock(&mm->page_table_lock); | ||||
| 		do { | ||||
| 			int write_access = write; | ||||
| 			struct page *page; | ||||
| 
 | ||||
| 			cond_resched_lock(&mm->page_table_lock); | ||||
| 			while (!(page = follow_page(mm, start, write))) { | ||||
| 			while (!(page = follow_page(mm, start, write_access))) { | ||||
| 				/*
 | ||||
| 				 * Shortcut for anonymous pages. We don't want | ||||
| 				 * to force the creation of pages tables for | ||||
|  | @ -957,7 +961,16 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 					break; | ||||
| 				} | ||||
| 				spin_unlock(&mm->page_table_lock); | ||||
| 				switch (handle_mm_fault(mm,vma,start,write)) { | ||||
| 				switch (__handle_mm_fault(mm, vma, start, | ||||
| 							write_access)) { | ||||
| 				case VM_FAULT_WRITE: | ||||
| 					/*
 | ||||
| 					 * do_wp_page has broken COW when | ||||
| 					 * necessary, even if maybe_mkwrite | ||||
| 					 * decided not to set pte_write | ||||
| 					 */ | ||||
| 					write_access = 0; | ||||
| 					/* FALLTHRU */ | ||||
| 				case VM_FAULT_MINOR: | ||||
| 					tsk->min_flt++; | ||||
| 					break; | ||||
|  | @ -1220,6 +1233,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 	struct page *old_page, *new_page; | ||||
| 	unsigned long pfn = pte_pfn(pte); | ||||
| 	pte_t entry; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (unlikely(!pfn_valid(pfn))) { | ||||
| 		/*
 | ||||
|  | @ -1247,7 +1261,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 			lazy_mmu_prot_update(entry); | ||||
| 			pte_unmap(page_table); | ||||
| 			spin_unlock(&mm->page_table_lock); | ||||
| 			return VM_FAULT_MINOR; | ||||
| 			return VM_FAULT_MINOR|VM_FAULT_WRITE; | ||||
| 		} | ||||
| 	} | ||||
| 	pte_unmap(page_table); | ||||
|  | @ -1274,6 +1288,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 	/*
 | ||||
| 	 * Re-check the pte - we dropped the lock | ||||
| 	 */ | ||||
| 	ret = VM_FAULT_MINOR; | ||||
| 	spin_lock(&mm->page_table_lock); | ||||
| 	page_table = pte_offset_map(pmd, address); | ||||
| 	if (likely(pte_same(*page_table, pte))) { | ||||
|  | @ -1290,12 +1305,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
| 
 | ||||
| 		/* Free the old page.. */ | ||||
| 		new_page = old_page; | ||||
| 		ret |= VM_FAULT_WRITE; | ||||
| 	} | ||||
| 	pte_unmap(page_table); | ||||
| 	page_cache_release(new_page); | ||||
| 	page_cache_release(old_page); | ||||
| 	spin_unlock(&mm->page_table_lock); | ||||
| 	return VM_FAULT_MINOR; | ||||
| 	return ret; | ||||
| 
 | ||||
| no_new_page: | ||||
| 	page_cache_release(old_page); | ||||
|  | @ -1987,7 +2003,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 	if (write_access) { | ||||
| 		if (!pte_write(entry)) | ||||
| 			return do_wp_page(mm, vma, address, pte, pmd, entry); | ||||
| 
 | ||||
| 		entry = pte_mkdirty(entry); | ||||
| 	} | ||||
| 	entry = pte_mkyoung(entry); | ||||
|  | @ -2002,7 +2017,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| /*
 | ||||
|  * By the time we get here, we already hold the mm semaphore | ||||
|  */ | ||||
| int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | ||||
| int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | ||||
| 		unsigned long address, int write_access) | ||||
| { | ||||
| 	pgd_t *pgd; | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Nick Piggin
						Nick Piggin