forked from mirrors/linux
		
	mm/migrate: support un-addressable ZONE_DEVICE page in migration
Allow to unmap and restore special swap entry of un-addressable ZONE_DEVICE memory. Link: http://lkml.kernel.org/r/20170817000548.32038-17-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									8c3328f1f3
								
							
						
					
					
						commit
						a5430dda8a
					
				
					 4 changed files with 166 additions and 31 deletions
				
			
		|  | @ -159,12 +159,18 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MIGRATION | #ifdef CONFIG_MIGRATION | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Watch out for PAE architecture, which has an unsigned long, and might not | ||||||
|  |  * have enough bits to store all physical address and flags. So far we have | ||||||
|  |  * enough room for all our flags. | ||||||
|  |  */ | ||||||
| #define MIGRATE_PFN_VALID	(1UL << 0) | #define MIGRATE_PFN_VALID	(1UL << 0) | ||||||
| #define MIGRATE_PFN_MIGRATE	(1UL << 1) | #define MIGRATE_PFN_MIGRATE	(1UL << 1) | ||||||
| #define MIGRATE_PFN_LOCKED	(1UL << 2) | #define MIGRATE_PFN_LOCKED	(1UL << 2) | ||||||
| #define MIGRATE_PFN_WRITE	(1UL << 3) | #define MIGRATE_PFN_WRITE	(1UL << 3) | ||||||
| #define MIGRATE_PFN_ERROR	(1UL << 4) | #define MIGRATE_PFN_DEVICE	(1UL << 4) | ||||||
| #define MIGRATE_PFN_SHIFT	5 | #define MIGRATE_PFN_ERROR	(1UL << 5) | ||||||
|  | #define MIGRATE_PFN_SHIFT	6 | ||||||
| 
 | 
 | ||||||
| static inline struct page *migrate_pfn_to_page(unsigned long mpfn) | static inline struct page *migrate_pfn_to_page(unsigned long mpfn) | ||||||
| { | { | ||||||
|  |  | ||||||
							
								
								
									
										151
									
								
								mm/migrate.c
									
									
									
									
									
								
							
							
						
						
									
										151
									
								
								mm/migrate.c
									
									
									
									
									
								
							|  | @ -36,6 +36,7 @@ | ||||||
| #include <linux/hugetlb.h> | #include <linux/hugetlb.h> | ||||||
| #include <linux/hugetlb_cgroup.h> | #include <linux/hugetlb_cgroup.h> | ||||||
| #include <linux/gfp.h> | #include <linux/gfp.h> | ||||||
|  | #include <linux/memremap.h> | ||||||
| #include <linux/balloon_compaction.h> | #include <linux/balloon_compaction.h> | ||||||
| #include <linux/mmu_notifier.h> | #include <linux/mmu_notifier.h> | ||||||
| #include <linux/page_idle.h> | #include <linux/page_idle.h> | ||||||
|  | @ -237,7 +238,13 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, | ||||||
| 		if (is_write_migration_entry(entry)) | 		if (is_write_migration_entry(entry)) | ||||||
| 			pte = maybe_mkwrite(pte, vma); | 			pte = maybe_mkwrite(pte, vma); | ||||||
| 
 | 
 | ||||||
| 		flush_dcache_page(new); | 		if (unlikely(is_zone_device_page(new)) && | ||||||
|  | 		    is_device_private_page(new)) { | ||||||
|  | 			entry = make_device_private_entry(new, pte_write(pte)); | ||||||
|  | 			pte = swp_entry_to_pte(entry); | ||||||
|  | 		} else | ||||||
|  | 			flush_dcache_page(new); | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_HUGETLB_PAGE | #ifdef CONFIG_HUGETLB_PAGE | ||||||
| 		if (PageHuge(new)) { | 		if (PageHuge(new)) { | ||||||
| 			pte = pte_mkhuge(pte); | 			pte = pte_mkhuge(pte); | ||||||
|  | @ -2205,17 +2212,40 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, | ||||||
| 		pte = *ptep; | 		pte = *ptep; | ||||||
| 		pfn = pte_pfn(pte); | 		pfn = pte_pfn(pte); | ||||||
| 
 | 
 | ||||||
| 		if (!pte_present(pte)) { | 		if (pte_none(pte)) { | ||||||
| 			mpfn = pfn = 0; | 			mpfn = pfn = 0; | ||||||
| 			goto next; | 			goto next; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		if (!pte_present(pte)) { | ||||||
|  | 			mpfn = pfn = 0; | ||||||
|  | 
 | ||||||
|  | 			/*
 | ||||||
|  | 			 * Only care about unaddressable device page special | ||||||
|  | 			 * page table entry. Other special swap entries are not | ||||||
|  | 			 * migratable, and we ignore regular swapped page. | ||||||
|  | 			 */ | ||||||
|  | 			entry = pte_to_swp_entry(pte); | ||||||
|  | 			if (!is_device_private_entry(entry)) | ||||||
|  | 				goto next; | ||||||
|  | 
 | ||||||
|  | 			page = device_private_entry_to_page(entry); | ||||||
|  | 			mpfn = migrate_pfn(page_to_pfn(page))| | ||||||
|  | 				MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE; | ||||||
|  | 			if (is_write_device_private_entry(entry)) | ||||||
|  | 				mpfn |= MIGRATE_PFN_WRITE; | ||||||
|  | 		} else { | ||||||
|  | 			page = vm_normal_page(migrate->vma, addr, pte); | ||||||
|  | 			mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; | ||||||
|  | 			mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		/* FIXME support THP */ | 		/* FIXME support THP */ | ||||||
| 		page = vm_normal_page(migrate->vma, addr, pte); |  | ||||||
| 		if (!page || !page->mapping || PageTransCompound(page)) { | 		if (!page || !page->mapping || PageTransCompound(page)) { | ||||||
| 			mpfn = pfn = 0; | 			mpfn = pfn = 0; | ||||||
| 			goto next; | 			goto next; | ||||||
| 		} | 		} | ||||||
|  | 		pfn = page_to_pfn(page); | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * By getting a reference on the page we pin it and that blocks | 		 * By getting a reference on the page we pin it and that blocks | ||||||
|  | @ -2228,8 +2258,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, | ||||||
| 		 */ | 		 */ | ||||||
| 		get_page(page); | 		get_page(page); | ||||||
| 		migrate->cpages++; | 		migrate->cpages++; | ||||||
| 		mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; |  | ||||||
| 		mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; |  | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Optimize for the common case where page is only mapped once | 		 * Optimize for the common case where page is only mapped once | ||||||
|  | @ -2256,10 +2284,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, | ||||||
| 			 */ | 			 */ | ||||||
| 			page_remove_rmap(page, false); | 			page_remove_rmap(page, false); | ||||||
| 			put_page(page); | 			put_page(page); | ||||||
| 			unmapped++; | 
 | ||||||
|  | 			if (pte_present(pte)) | ||||||
|  | 				unmapped++; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| next: | next: | ||||||
|  | 		migrate->dst[migrate->npages] = 0; | ||||||
| 		migrate->src[migrate->npages++] = mpfn; | 		migrate->src[migrate->npages++] = mpfn; | ||||||
| 	} | 	} | ||||||
| 	arch_leave_lazy_mmu_mode(); | 	arch_leave_lazy_mmu_mode(); | ||||||
|  | @ -2329,6 +2360,28 @@ static bool migrate_vma_check_page(struct page *page) | ||||||
| 	if (PageCompound(page)) | 	if (PageCompound(page)) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
|  | 	/* Page from ZONE_DEVICE have one extra reference */ | ||||||
|  | 	if (is_zone_device_page(page)) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * Private page can never be pin as they have no valid pte and | ||||||
|  | 		 * GUP will fail for those. Yet if there is a pending migration | ||||||
|  | 		 * a thread might try to wait on the pte migration entry and | ||||||
|  | 		 * will bump the page reference count. Sadly there is no way to | ||||||
|  | 		 * differentiate a regular pin from migration wait. Hence to | ||||||
|  | 		 * avoid 2 racing thread trying to migrate back to CPU to enter | ||||||
|  | 		 * infinite loop (one stoping migration because the other is | ||||||
|  | 		 * waiting on pte migration entry). We always return true here. | ||||||
|  | 		 * | ||||||
|  | 		 * FIXME proper solution is to rework migration_entry_wait() so | ||||||
|  | 		 * it does not need to take a reference on page. | ||||||
|  | 		 */ | ||||||
|  | 		if (is_device_private_page(page)) | ||||||
|  | 			return true; | ||||||
|  | 
 | ||||||
|  | 		/* Other ZONE_DEVICE memory type are not supported */ | ||||||
|  | 		return false; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	if ((page_count(page) - extra) > page_mapcount(page)) | 	if ((page_count(page) - extra) > page_mapcount(page)) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
|  | @ -2379,24 +2432,30 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) | ||||||
| 			migrate->src[i] |= MIGRATE_PFN_LOCKED; | 			migrate->src[i] |= MIGRATE_PFN_LOCKED; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (!PageLRU(page) && allow_drain) { | 		/* ZONE_DEVICE pages are not on LRU */ | ||||||
| 			/* Drain CPU's pagevec */ | 		if (!is_zone_device_page(page)) { | ||||||
| 			lru_add_drain_all(); | 			if (!PageLRU(page) && allow_drain) { | ||||||
| 			allow_drain = false; | 				/* Drain CPU's pagevec */ | ||||||
| 		} | 				lru_add_drain_all(); | ||||||
| 
 | 				allow_drain = false; | ||||||
| 		if (isolate_lru_page(page)) { |  | ||||||
| 			if (remap) { |  | ||||||
| 				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; |  | ||||||
| 				migrate->cpages--; |  | ||||||
| 				restore++; |  | ||||||
| 			} else { |  | ||||||
| 				migrate->src[i] = 0; |  | ||||||
| 				unlock_page(page); |  | ||||||
| 				migrate->cpages--; |  | ||||||
| 				put_page(page); |  | ||||||
| 			} | 			} | ||||||
| 			continue; | 
 | ||||||
|  | 			if (isolate_lru_page(page)) { | ||||||
|  | 				if (remap) { | ||||||
|  | 					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | ||||||
|  | 					migrate->cpages--; | ||||||
|  | 					restore++; | ||||||
|  | 				} else { | ||||||
|  | 					migrate->src[i] = 0; | ||||||
|  | 					unlock_page(page); | ||||||
|  | 					migrate->cpages--; | ||||||
|  | 					put_page(page); | ||||||
|  | 				} | ||||||
|  | 				continue; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			/* Drop the reference we took in collect */ | ||||||
|  | 			put_page(page); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (!migrate_vma_check_page(page)) { | 		if (!migrate_vma_check_page(page)) { | ||||||
|  | @ -2405,14 +2464,19 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) | ||||||
| 				migrate->cpages--; | 				migrate->cpages--; | ||||||
| 				restore++; | 				restore++; | ||||||
| 
 | 
 | ||||||
| 				get_page(page); | 				if (!is_zone_device_page(page)) { | ||||||
| 				putback_lru_page(page); | 					get_page(page); | ||||||
|  | 					putback_lru_page(page); | ||||||
|  | 				} | ||||||
| 			} else { | 			} else { | ||||||
| 				migrate->src[i] = 0; | 				migrate->src[i] = 0; | ||||||
| 				unlock_page(page); | 				unlock_page(page); | ||||||
| 				migrate->cpages--; | 				migrate->cpages--; | ||||||
| 
 | 
 | ||||||
| 				putback_lru_page(page); | 				if (!is_zone_device_page(page)) | ||||||
|  | 					putback_lru_page(page); | ||||||
|  | 				else | ||||||
|  | 					put_page(page); | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | @ -2483,7 +2547,10 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) | ||||||
| 		unlock_page(page); | 		unlock_page(page); | ||||||
| 		restore--; | 		restore--; | ||||||
| 
 | 
 | ||||||
| 		putback_lru_page(page); | 		if (is_zone_device_page(page)) | ||||||
|  | 			put_page(page); | ||||||
|  | 		else | ||||||
|  | 			putback_lru_page(page); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2514,6 +2581,26 @@ static void migrate_vma_pages(struct migrate_vma *migrate) | ||||||
| 
 | 
 | ||||||
| 		mapping = page_mapping(page); | 		mapping = page_mapping(page); | ||||||
| 
 | 
 | ||||||
|  | 		if (is_zone_device_page(newpage)) { | ||||||
|  | 			if (is_device_private_page(newpage)) { | ||||||
|  | 				/*
 | ||||||
|  | 				 * For now only support private anonymous when | ||||||
|  | 				 * migrating to un-addressable device memory. | ||||||
|  | 				 */ | ||||||
|  | 				if (mapping) { | ||||||
|  | 					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | ||||||
|  | 					continue; | ||||||
|  | 				} | ||||||
|  | 			} else { | ||||||
|  | 				/*
 | ||||||
|  | 				 * Other types of ZONE_DEVICE page are not | ||||||
|  | 				 * supported. | ||||||
|  | 				 */ | ||||||
|  | 				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | ||||||
|  | 				continue; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); | 		r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); | ||||||
| 		if (r != MIGRATEPAGE_SUCCESS) | 		if (r != MIGRATEPAGE_SUCCESS) | ||||||
| 			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | 			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; | ||||||
|  | @ -2554,11 +2641,17 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) | ||||||
| 		unlock_page(page); | 		unlock_page(page); | ||||||
| 		migrate->cpages--; | 		migrate->cpages--; | ||||||
| 
 | 
 | ||||||
| 		putback_lru_page(page); | 		if (is_zone_device_page(page)) | ||||||
|  | 			put_page(page); | ||||||
|  | 		else | ||||||
|  | 			putback_lru_page(page); | ||||||
| 
 | 
 | ||||||
| 		if (newpage != page) { | 		if (newpage != page) { | ||||||
| 			unlock_page(newpage); | 			unlock_page(newpage); | ||||||
| 			putback_lru_page(newpage); | 			if (is_zone_device_page(newpage)) | ||||||
|  | 				put_page(newpage); | ||||||
|  | 			else | ||||||
|  | 				putback_lru_page(newpage); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -48,6 +48,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) | ||||||
| 		if (!is_swap_pte(*pvmw->pte)) | 		if (!is_swap_pte(*pvmw->pte)) | ||||||
| 			return false; | 			return false; | ||||||
| 		entry = pte_to_swp_entry(*pvmw->pte); | 		entry = pte_to_swp_entry(*pvmw->pte); | ||||||
|  | 
 | ||||||
| 		if (!is_migration_entry(entry)) | 		if (!is_migration_entry(entry)) | ||||||
| 			return false; | 			return false; | ||||||
| 		if (migration_entry_to_page(entry) - pvmw->page >= | 		if (migration_entry_to_page(entry) - pvmw->page >= | ||||||
|  | @ -60,6 +61,15 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) | ||||||
| 		WARN_ON_ONCE(1); | 		WARN_ON_ONCE(1); | ||||||
| #endif | #endif | ||||||
| 	} else { | 	} else { | ||||||
|  | 		if (is_swap_pte(*pvmw->pte)) { | ||||||
|  | 			swp_entry_t entry; | ||||||
|  | 
 | ||||||
|  | 			entry = pte_to_swp_entry(*pvmw->pte); | ||||||
|  | 			if (is_device_private_entry(entry) && | ||||||
|  | 			    device_private_entry_to_page(entry) == pvmw->page) | ||||||
|  | 				return true; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		if (!pte_present(*pvmw->pte)) | 		if (!pte_present(*pvmw->pte)) | ||||||
| 			return false; | 			return false; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										26
									
								
								mm/rmap.c
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								mm/rmap.c
									
									
									
									
									
								
							|  | @ -63,6 +63,7 @@ | ||||||
| #include <linux/hugetlb.h> | #include <linux/hugetlb.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/backing-dev.h> | ||||||
| #include <linux/page_idle.h> | #include <linux/page_idle.h> | ||||||
|  | #include <linux/memremap.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/tlbflush.h> | #include <asm/tlbflush.h> | ||||||
| 
 | 
 | ||||||
|  | @ -1346,6 +1347,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | ||||||
| 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) | 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) | ||||||
| 		return true; | 		return true; | ||||||
| 
 | 
 | ||||||
|  | 	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && | ||||||
|  | 	    is_zone_device_page(page) && !is_device_private_page(page)) | ||||||
|  | 		return true; | ||||||
|  | 
 | ||||||
| 	if (flags & TTU_SPLIT_HUGE_PMD) { | 	if (flags & TTU_SPLIT_HUGE_PMD) { | ||||||
| 		split_huge_pmd_address(vma, address, | 		split_huge_pmd_address(vma, address, | ||||||
| 				flags & TTU_SPLIT_FREEZE, page); | 				flags & TTU_SPLIT_FREEZE, page); | ||||||
|  | @ -1403,6 +1408,27 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | ||||||
| 		address = pvmw.address; | 		address = pvmw.address; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | 		if (IS_ENABLED(CONFIG_MIGRATION) && | ||||||
|  | 		    (flags & TTU_MIGRATION) && | ||||||
|  | 		    is_zone_device_page(page)) { | ||||||
|  | 			swp_entry_t entry; | ||||||
|  | 			pte_t swp_pte; | ||||||
|  | 
 | ||||||
|  | 			pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte); | ||||||
|  | 
 | ||||||
|  | 			/*
 | ||||||
|  | 			 * Store the pfn of the page in a special migration | ||||||
|  | 			 * pte. do_swap_page() will wait until the migration | ||||||
|  | 			 * pte is removed and then restart fault handling. | ||||||
|  | 			 */ | ||||||
|  | 			entry = make_migration_entry(page, 0); | ||||||
|  | 			swp_pte = swp_entry_to_pte(entry); | ||||||
|  | 			if (pte_soft_dirty(pteval)) | ||||||
|  | 				swp_pte = pte_swp_mksoft_dirty(swp_pte); | ||||||
|  | 			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); | ||||||
|  | 			goto discard; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		if (!(flags & TTU_IGNORE_ACCESS)) { | 		if (!(flags & TTU_IGNORE_ACCESS)) { | ||||||
| 			if (ptep_clear_flush_young_notify(vma, address, | 			if (ptep_clear_flush_young_notify(vma, address, | ||||||
| 						pvmw.pte)) { | 						pvmw.pte)) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Jérôme Glisse
						Jérôme Glisse