mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm/device-public-memory: device memory cache coherent with CPU
Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									8315ada7f0
								
							
						
					
					
						commit
						df6ad69838
					
				
					 14 changed files with 158 additions and 46 deletions
				
			
		| 
						 | 
					@ -1267,7 +1267,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 | 
				
			||||||
		if (pm->show_pfn)
 | 
							if (pm->show_pfn)
 | 
				
			||||||
			frame = pte_pfn(pte);
 | 
								frame = pte_pfn(pte);
 | 
				
			||||||
		flags |= PM_PRESENT;
 | 
							flags |= PM_PRESENT;
 | 
				
			||||||
		page = vm_normal_page(vma, addr, pte);
 | 
							page = _vm_normal_page(vma, addr, pte, true);
 | 
				
			||||||
		if (pte_soft_dirty(pte))
 | 
							if (pte_soft_dirty(pte))
 | 
				
			||||||
			flags |= PM_SOFT_DIRTY;
 | 
								flags |= PM_SOFT_DIRTY;
 | 
				
			||||||
	} else if (is_swap_pte(pte)) {
 | 
						} else if (is_swap_pte(pte)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 | 
				
			||||||
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 | 
					#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
 | 
					#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 | 
				
			||||||
struct hmm_devmem;
 | 
					struct hmm_devmem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
 | 
					struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
 | 
				
			||||||
| 
						 | 
					@ -494,7 +494,7 @@ struct hmm_device {
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct hmm_device *hmm_device_new(void *drvdata);
 | 
					struct hmm_device *hmm_device_new(void *drvdata);
 | 
				
			||||||
void hmm_device_put(struct hmm_device *hmm_device);
 | 
					void hmm_device_put(struct hmm_device *hmm_device);
 | 
				
			||||||
#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */
 | 
					#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Below are for HMM internal use only! Not to be used by device driver! */
 | 
					/* Below are for HMM internal use only! Not to be used by device driver! */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -131,6 +131,7 @@ enum {
 | 
				
			||||||
	IORES_DESC_PERSISTENT_MEMORY		= 4,
 | 
						IORES_DESC_PERSISTENT_MEMORY		= 4,
 | 
				
			||||||
	IORES_DESC_PERSISTENT_MEMORY_LEGACY	= 5,
 | 
						IORES_DESC_PERSISTENT_MEMORY_LEGACY	= 5,
 | 
				
			||||||
	IORES_DESC_DEVICE_PRIVATE_MEMORY	= 6,
 | 
						IORES_DESC_DEVICE_PRIVATE_MEMORY	= 6,
 | 
				
			||||||
 | 
						IORES_DESC_DEVICE_PUBLIC_MEMORY		= 7,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* helpers to define resources */
 | 
					/* helpers to define resources */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * A more complete discussion of unaddressable memory may be found in
 | 
					 * A more complete discussion of unaddressable memory may be found in
 | 
				
			||||||
 * include/linux/hmm.h and Documentation/vm/hmm.txt.
 | 
					 * include/linux/hmm.h and Documentation/vm/hmm.txt.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * MEMORY_DEVICE_PUBLIC:
 | 
				
			||||||
 | 
					 * Device memory that is cache coherent from device and CPU point of view. This
 | 
				
			||||||
 | 
					 * is use on platform that have an advance system bus (like CAPI or CCIX). A
 | 
				
			||||||
 | 
					 * driver can hotplug the device memory using ZONE_DEVICE and with that memory
 | 
				
			||||||
 | 
					 * type. Any page of a process can be migrated to such memory. However no one
 | 
				
			||||||
 | 
					 * should be allow to pin such memory so that it can always be evicted.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
enum memory_type {
 | 
					enum memory_type {
 | 
				
			||||||
	MEMORY_DEVICE_HOST = 0,
 | 
						MEMORY_DEVICE_HOST = 0,
 | 
				
			||||||
	MEMORY_DEVICE_PRIVATE,
 | 
						MEMORY_DEVICE_PRIVATE,
 | 
				
			||||||
 | 
						MEMORY_DEVICE_PUBLIC,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -92,6 +100,8 @@ enum memory_type {
 | 
				
			||||||
 * The page_free() callback is called once the page refcount reaches 1
 | 
					 * The page_free() callback is called once the page refcount reaches 1
 | 
				
			||||||
 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
 | 
					 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
 | 
				
			||||||
 * This allows the device driver to implement its own memory management.)
 | 
					 * This allows the device driver to implement its own memory management.)
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
 | 
					typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
 | 
				
			||||||
				unsigned long addr,
 | 
									unsigned long addr,
 | 
				
			||||||
| 
						 | 
					@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct page *page)
 | 
				
			||||||
	return is_zone_device_page(page) &&
 | 
						return is_zone_device_page(page) &&
 | 
				
			||||||
		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 | 
							page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool is_device_public_page(const struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return is_zone_device_page(page) &&
 | 
				
			||||||
 | 
							page->pgmap->type == MEMORY_DEVICE_PUBLIC;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
static inline void *devm_memremap_pages(struct device *dev,
 | 
					static inline void *devm_memremap_pages(struct device *dev,
 | 
				
			||||||
		struct resource *res, struct percpu_ref *ref,
 | 
							struct resource *res, struct percpu_ref *ref,
 | 
				
			||||||
| 
						 | 
					@ -157,6 +173,11 @@ static inline bool is_device_private_page(const struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool is_device_public_page(const struct page *page)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return false;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -800,15 +800,16 @@ static inline bool is_zone_device_page(const struct page *page)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_DEVICE_PRIVATE
 | 
					#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 | 
				
			||||||
void put_zone_device_private_page(struct page *page);
 | 
					void put_zone_device_private_or_public_page(struct page *page);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
static inline void put_zone_device_private_page(struct page *page)
 | 
					static inline void put_zone_device_private_or_public_page(struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool is_device_private_page(const struct page *page);
 | 
					static inline bool is_device_private_page(const struct page *page);
 | 
				
			||||||
 | 
					static inline bool is_device_public_page(const struct page *page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DECLARE_STATIC_KEY_FALSE(device_private_key);
 | 
					DECLARE_STATIC_KEY_FALSE(device_private_key);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -834,8 +835,9 @@ static inline void put_page(struct page *page)
 | 
				
			||||||
	 * include/linux/memremap.h and HMM for details.
 | 
						 * include/linux/memremap.h and HMM for details.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (static_branch_unlikely(&device_private_key) &&
 | 
						if (static_branch_unlikely(&device_private_key) &&
 | 
				
			||||||
	    unlikely(is_device_private_page(page))) {
 | 
						    unlikely(is_device_private_page(page) ||
 | 
				
			||||||
		put_zone_device_private_page(page);
 | 
							     is_device_public_page(page))) {
 | 
				
			||||||
 | 
							put_zone_device_private_or_public_page(page);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1224,8 +1226,10 @@ struct zap_details {
 | 
				
			||||||
	pgoff_t last_index;			/* Highest page->index to unmap */
 | 
						pgoff_t last_index;			/* Highest page->index to unmap */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 | 
					struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
		pte_t pte);
 | 
								     pte_t pte, bool with_public_device);
 | 
				
			||||||
 | 
					#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 | 
					struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
				pmd_t pmd);
 | 
									pmd_t pmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -501,8 +501,8 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
 | 
				
			||||||
#endif /* CONFIG_ZONE_DEVICE */
 | 
					#endif /* CONFIG_ZONE_DEVICE */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_DEVICE_PRIVATE
 | 
					#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 | 
				
			||||||
void put_zone_device_private_page(struct page *page)
 | 
					void put_zone_device_private_or_public_page(struct page *page)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int count = page_ref_dec_return(page);
 | 
						int count = page_ref_dec_return(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -522,5 +522,5 @@ void put_zone_device_private_page(struct page *page)
 | 
				
			||||||
	} else if (!count)
 | 
						} else if (!count)
 | 
				
			||||||
		__put_page(page);
 | 
							__put_page(page);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(put_zone_device_private_page);
 | 
					EXPORT_SYMBOL(put_zone_device_private_or_public_page);
 | 
				
			||||||
#endif /* CONFIG_DEVICE_PRIVATE */
 | 
					#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										11
									
								
								mm/Kconfig
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								mm/Kconfig
									
									
									
									
									
								
							| 
						 | 
					@ -720,12 +720,23 @@ config HMM_MIRROR
 | 
				
			||||||
config DEVICE_PRIVATE
 | 
					config DEVICE_PRIVATE
 | 
				
			||||||
	bool "Unaddressable device memory (GPU memory, ...)"
 | 
						bool "Unaddressable device memory (GPU memory, ...)"
 | 
				
			||||||
	depends on ARCH_HAS_HMM
 | 
						depends on ARCH_HAS_HMM
 | 
				
			||||||
 | 
						select HMM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
	  Allows creation of struct pages to represent unaddressable device
 | 
						  Allows creation of struct pages to represent unaddressable device
 | 
				
			||||||
	  memory; i.e., memory that is only accessible from the device (or
 | 
						  memory; i.e., memory that is only accessible from the device (or
 | 
				
			||||||
	  group of devices). You likely also want to select HMM_MIRROR.
 | 
						  group of devices). You likely also want to select HMM_MIRROR.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config DEVICE_PUBLIC
 | 
				
			||||||
 | 
						bool "Addressable device memory (like GPU memory)"
 | 
				
			||||||
 | 
						depends on ARCH_HAS_HMM
 | 
				
			||||||
 | 
						select HMM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						help
 | 
				
			||||||
 | 
						  Allows creation of struct pages to represent addressable device
 | 
				
			||||||
 | 
						  memory; i.e., memory that is accessible from both the device and
 | 
				
			||||||
 | 
						  the CPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config FRAME_VECTOR
 | 
					config FRAME_VECTOR
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										7
									
								
								mm/gup.c
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								mm/gup.c
									
									
									
									
									
								
							| 
						 | 
					@ -456,6 +456,13 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 | 
				
			||||||
		if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
 | 
							if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
 | 
				
			||||||
			goto unmap;
 | 
								goto unmap;
 | 
				
			||||||
		*page = pte_page(*pte);
 | 
							*page = pte_page(*pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * This should never happen (a device public page in the gate
 | 
				
			||||||
 | 
							 * area).
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (is_device_public_page(*page))
 | 
				
			||||||
 | 
								goto unmap;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	get_page(*page);
 | 
						get_page(*page);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4
									
								
								mm/hmm.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								mm/hmm.c
									
									
									
									
									
								
							| 
						 | 
					@ -737,7 +737,7 @@ EXPORT_SYMBOL(hmm_vma_fault);
 | 
				
			||||||
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 | 
					#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
 | 
					#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 | 
				
			||||||
struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
 | 
					struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
 | 
				
			||||||
				       unsigned long addr)
 | 
									       unsigned long addr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1177,4 +1177,4 @@ static int __init hmm_init(void)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
device_initcall(hmm_init);
 | 
					device_initcall(hmm_init);
 | 
				
			||||||
#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */
 | 
					#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -355,7 +355,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		page = vm_normal_page(vma, addr, ptent);
 | 
							page = _vm_normal_page(vma, addr, ptent, true);
 | 
				
			||||||
		if (!page)
 | 
							if (!page)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4623,10 +4623,11 @@ static int mem_cgroup_move_account(struct page *page,
 | 
				
			||||||
 *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
 | 
					 *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
 | 
				
			||||||
 *     target for charge migration. if @target is not NULL, the entry is stored
 | 
					 *     target for charge migration. if @target is not NULL, the entry is stored
 | 
				
			||||||
 *     in target->ent.
 | 
					 *     in target->ent.
 | 
				
			||||||
 *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is MEMORY_DEVICE_PRIVATE
 | 
					 *   3(MC_TARGET_DEVICE): like MC_TARGET_PAGE  but page is MEMORY_DEVICE_PUBLIC
 | 
				
			||||||
 *     (so ZONE_DEVICE page and thus not on the lru). For now we such page is
 | 
					 *     or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
 | 
				
			||||||
 *     charge like a regular page would be as for all intent and purposes it is
 | 
					 *     For now we such page is charge like a regular page would be as for all
 | 
				
			||||||
 *     just special memory taking the place of a regular page.
 | 
					 *     intent and purposes it is just special memory taking the place of a
 | 
				
			||||||
 | 
					 *     regular page.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *     See Documentations/vm/hmm.txt and include/linux/hmm.h
 | 
					 *     See Documentations/vm/hmm.txt and include/linux/hmm.h
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -4657,7 +4658,8 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (page->mem_cgroup == mc.from) {
 | 
							if (page->mem_cgroup == mc.from) {
 | 
				
			||||||
			ret = MC_TARGET_PAGE;
 | 
								ret = MC_TARGET_PAGE;
 | 
				
			||||||
			if (is_device_private_page(page))
 | 
								if (is_device_private_page(page) ||
 | 
				
			||||||
 | 
								    is_device_public_page(page))
 | 
				
			||||||
				ret = MC_TARGET_DEVICE;
 | 
									ret = MC_TARGET_DEVICE;
 | 
				
			||||||
			if (target)
 | 
								if (target)
 | 
				
			||||||
				target->page = page;
 | 
									target->page = page;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										44
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								mm/memory.c
									
									
									
									
									
								
							| 
						 | 
					@ -818,8 +818,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
# define HAVE_PTE_SPECIAL 0
 | 
					# define HAVE_PTE_SPECIAL 0
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 | 
					struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
				pte_t pte)
 | 
								     pte_t pte, bool with_public_device)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long pfn = pte_pfn(pte);
 | 
						unsigned long pfn = pte_pfn(pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -830,7 +830,30 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 | 
				
			||||||
			return vma->vm_ops->find_special_page(vma, addr);
 | 
								return vma->vm_ops->find_special_page(vma, addr);
 | 
				
			||||||
		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 | 
							if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 | 
				
			||||||
			return NULL;
 | 
								return NULL;
 | 
				
			||||||
		if (!is_zero_pfn(pfn))
 | 
							if (is_zero_pfn(pfn))
 | 
				
			||||||
 | 
								return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Device public pages are special pages (they are ZONE_DEVICE
 | 
				
			||||||
 | 
							 * pages but different from persistent memory). They behave
 | 
				
			||||||
 | 
							 * allmost like normal pages. The difference is that they are
 | 
				
			||||||
 | 
							 * not on the lru and thus should never be involve with any-
 | 
				
			||||||
 | 
							 * thing that involve lru manipulation (mlock, numa balancing,
 | 
				
			||||||
 | 
							 * ...).
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * This is why we still want to return NULL for such page from
 | 
				
			||||||
 | 
							 * vm_normal_page() so that we do not have to special case all
 | 
				
			||||||
 | 
							 * call site of vm_normal_page().
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (likely(pfn < highest_memmap_pfn)) {
 | 
				
			||||||
 | 
								struct page *page = pfn_to_page(pfn);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (is_device_public_page(page)) {
 | 
				
			||||||
 | 
									if (with_public_device)
 | 
				
			||||||
 | 
										return page;
 | 
				
			||||||
 | 
									return NULL;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		print_bad_pte(vma, addr, pte, NULL);
 | 
							print_bad_pte(vma, addr, pte, NULL);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -1012,6 +1035,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 | 
				
			||||||
		get_page(page);
 | 
							get_page(page);
 | 
				
			||||||
		page_dup_rmap(page, false);
 | 
							page_dup_rmap(page, false);
 | 
				
			||||||
		rss[mm_counter(page)]++;
 | 
							rss[mm_counter(page)]++;
 | 
				
			||||||
 | 
						} else if (pte_devmap(pte)) {
 | 
				
			||||||
 | 
							page = pte_page(pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Cache coherent device memory behave like regular page and
 | 
				
			||||||
 | 
							 * not like persistent memory page. For more informations see
 | 
				
			||||||
 | 
							 * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (is_device_public_page(page)) {
 | 
				
			||||||
 | 
								get_page(page);
 | 
				
			||||||
 | 
								page_dup_rmap(page, false);
 | 
				
			||||||
 | 
								rss[mm_counter(page)]++;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_set_pte:
 | 
					out_set_pte:
 | 
				
			||||||
| 
						 | 
					@ -1267,7 +1303,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 | 
				
			||||||
		if (pte_present(ptent)) {
 | 
							if (pte_present(ptent)) {
 | 
				
			||||||
			struct page *page;
 | 
								struct page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			page = vm_normal_page(vma, addr, ptent);
 | 
								page = _vm_normal_page(vma, addr, ptent, true);
 | 
				
			||||||
			if (unlikely(details) && page) {
 | 
								if (unlikely(details) && page) {
 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * unmap_shared_mapping_pages() wants to
 | 
									 * unmap_shared_mapping_pages() wants to
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										43
									
								
								mm/migrate.c
									
									
									
									
									
								
							
							
						
						
									
										43
									
								
								mm/migrate.c
									
									
									
									
									
								
							| 
						 | 
					@ -36,6 +36,7 @@
 | 
				
			||||||
#include <linux/hugetlb.h>
 | 
					#include <linux/hugetlb.h>
 | 
				
			||||||
#include <linux/hugetlb_cgroup.h>
 | 
					#include <linux/hugetlb_cgroup.h>
 | 
				
			||||||
#include <linux/gfp.h>
 | 
					#include <linux/gfp.h>
 | 
				
			||||||
 | 
					#include <linux/pfn_t.h>
 | 
				
			||||||
#include <linux/memremap.h>
 | 
					#include <linux/memremap.h>
 | 
				
			||||||
#include <linux/userfaultfd_k.h>
 | 
					#include <linux/userfaultfd_k.h>
 | 
				
			||||||
#include <linux/balloon_compaction.h>
 | 
					#include <linux/balloon_compaction.h>
 | 
				
			||||||
| 
						 | 
					@ -239,10 +240,14 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 | 
				
			||||||
		if (is_write_migration_entry(entry))
 | 
							if (is_write_migration_entry(entry))
 | 
				
			||||||
			pte = maybe_mkwrite(pte, vma);
 | 
								pte = maybe_mkwrite(pte, vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (unlikely(is_zone_device_page(new)) &&
 | 
							if (unlikely(is_zone_device_page(new))) {
 | 
				
			||||||
		    is_device_private_page(new)) {
 | 
								if (is_device_private_page(new)) {
 | 
				
			||||||
				entry = make_device_private_entry(new, pte_write(pte));
 | 
									entry = make_device_private_entry(new, pte_write(pte));
 | 
				
			||||||
				pte = swp_entry_to_pte(entry);
 | 
									pte = swp_entry_to_pte(entry);
 | 
				
			||||||
 | 
								} else if (is_device_public_page(new)) {
 | 
				
			||||||
 | 
									pte = pte_mkdevmap(pte);
 | 
				
			||||||
 | 
									flush_dcache_page(new);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
		} else
 | 
							} else
 | 
				
			||||||
			flush_dcache_page(new);
 | 
								flush_dcache_page(new);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -437,12 +442,11 @@ int migrate_page_move_mapping(struct address_space *mapping,
 | 
				
			||||||
	void **pslot;
 | 
						void **pslot;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * ZONE_DEVICE pages have 1 refcount always held by their device
 | 
						 * Device public or private pages have an extra refcount as they are
 | 
				
			||||||
	 *
 | 
						 * ZONE_DEVICE pages.
 | 
				
			||||||
	 * Note that DAX memory will never reach that point as it does not have
 | 
					 | 
				
			||||||
	 * the MEMORY_DEVICE_ALLOW_MIGRATE flag set (see memory_hotplug.h).
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	expected_count += is_zone_device_page(page);
 | 
						expected_count += is_device_private_page(page);
 | 
				
			||||||
 | 
						expected_count += is_device_public_page(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!mapping) {
 | 
						if (!mapping) {
 | 
				
			||||||
		/* Anonymous page without mapping */
 | 
							/* Anonymous page without mapping */
 | 
				
			||||||
| 
						 | 
					@ -2123,7 +2127,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* CONFIG_NUMA */
 | 
					#endif /* CONFIG_NUMA */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
struct migrate_vma {
 | 
					struct migrate_vma {
 | 
				
			||||||
	struct vm_area_struct	*vma;
 | 
						struct vm_area_struct	*vma;
 | 
				
			||||||
	unsigned long		*dst;
 | 
						unsigned long		*dst;
 | 
				
			||||||
| 
						 | 
					@ -2263,7 +2266,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 | 
				
			||||||
				pfn = 0;
 | 
									pfn = 0;
 | 
				
			||||||
				goto next;
 | 
									goto next;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			page = vm_normal_page(migrate->vma, addr, pte);
 | 
								page = _vm_normal_page(migrate->vma, addr, pte, true);
 | 
				
			||||||
			mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
 | 
								mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
 | 
				
			||||||
			mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 | 
								mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -2406,10 +2409,19 @@ static bool migrate_vma_check_page(struct page *page)
 | 
				
			||||||
		if (is_device_private_page(page))
 | 
							if (is_device_private_page(page))
 | 
				
			||||||
			return true;
 | 
								return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Other ZONE_DEVICE memory type are not supported */
 | 
							/*
 | 
				
			||||||
 | 
							 * Only allow device public page to be migrated and account for
 | 
				
			||||||
 | 
							 * the extra reference count imply by ZONE_DEVICE pages.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (!is_device_public_page(page))
 | 
				
			||||||
			return false;
 | 
								return false;
 | 
				
			||||||
 | 
							extra++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* For file back page */
 | 
				
			||||||
 | 
						if (page_mapping(page))
 | 
				
			||||||
 | 
							extra += 1 + page_has_private(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((page_count(page) - extra) > page_mapcount(page))
 | 
						if ((page_count(page) - extra) > page_mapcount(page))
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2647,11 +2659,18 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	__SetPageUptodate(page);
 | 
						__SetPageUptodate(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (is_zone_device_page(page) && is_device_private_page(page)) {
 | 
						if (is_zone_device_page(page)) {
 | 
				
			||||||
 | 
							if (is_device_private_page(page)) {
 | 
				
			||||||
			swp_entry_t swp_entry;
 | 
								swp_entry_t swp_entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
 | 
								swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
 | 
				
			||||||
			entry = swp_entry_to_pte(swp_entry);
 | 
								entry = swp_entry_to_pte(swp_entry);
 | 
				
			||||||
 | 
							} else if (is_device_public_page(page)) {
 | 
				
			||||||
 | 
								entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
 | 
				
			||||||
 | 
								if (vma->vm_flags & VM_WRITE)
 | 
				
			||||||
 | 
									entry = pte_mkwrite(pte_mkdirty(entry));
 | 
				
			||||||
 | 
								entry = pte_mkdevmap(entry);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		entry = mk_pte(page, vma->vm_page_prot);
 | 
							entry = mk_pte(page, vma->vm_page_prot);
 | 
				
			||||||
		if (vma->vm_flags & VM_WRITE)
 | 
							if (vma->vm_flags & VM_WRITE)
 | 
				
			||||||
| 
						 | 
					@ -2768,7 +2787,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
 | 
				
			||||||
					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
 | 
										migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
 | 
				
			||||||
					continue;
 | 
										continue;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			} else {
 | 
								} else if (!is_device_public_page(newpage)) {
 | 
				
			||||||
				/*
 | 
									/*
 | 
				
			||||||
				 * Other types of ZONE_DEVICE page are not
 | 
									 * Other types of ZONE_DEVICE page are not
 | 
				
			||||||
				 * supported.
 | 
									 * supported.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										11
									
								
								mm/swap.c
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								mm/swap.c
									
									
									
									
									
								
							| 
						 | 
					@ -765,6 +765,17 @@ void release_pages(struct page **pages, int nr, bool cold)
 | 
				
			||||||
		if (is_huge_zero_page(page))
 | 
							if (is_huge_zero_page(page))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Device public page can not be huge page */
 | 
				
			||||||
 | 
							if (is_device_public_page(page)) {
 | 
				
			||||||
 | 
								if (locked_pgdat) {
 | 
				
			||||||
 | 
									spin_unlock_irqrestore(&locked_pgdat->lru_lock,
 | 
				
			||||||
 | 
											       flags);
 | 
				
			||||||
 | 
									locked_pgdat = NULL;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								put_zone_device_private_or_public_page(page);
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		page = compound_head(page);
 | 
							page = compound_head(page);
 | 
				
			||||||
		if (!put_page_testzero(page))
 | 
							if (!put_page_testzero(page))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue