forked from mirrors/linux
		
	mm: new follow_pfnmap API
Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gavin Shan <gshan@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Niklas Schnelle <schnelle@linux.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0515e022e1
								
							
						
					
					
						commit
						6da8e9634b
					
				
					 2 changed files with 181 additions and 0 deletions
				
			
		|  | @ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, | ||||||
| int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||||||
| 			void *buf, int len, int write); | 			void *buf, int len, int write); | ||||||
| 
 | 
 | ||||||
|  | struct follow_pfnmap_args { | ||||||
|  | 	/**
 | ||||||
|  | 	 * Inputs: | ||||||
|  | 	 * @vma: Pointer to @vm_area_struct struct | ||||||
|  | 	 * @address: the virtual address to walk | ||||||
|  | 	 */ | ||||||
|  | 	struct vm_area_struct *vma; | ||||||
|  | 	unsigned long address; | ||||||
|  | 	/**
 | ||||||
|  | 	 * Internals: | ||||||
|  | 	 * | ||||||
|  | 	 * The caller shouldn't touch any of these. | ||||||
|  | 	 */ | ||||||
|  | 	spinlock_t *lock; | ||||||
|  | 	pte_t *ptep; | ||||||
|  | 	/**
 | ||||||
|  | 	 * Outputs: | ||||||
|  | 	 * | ||||||
|  | 	 * @pfn: the PFN of the address | ||||||
|  | 	 * @pgprot: the pgprot_t of the mapping | ||||||
|  | 	 * @writable: whether the mapping is writable | ||||||
|  | 	 * @special: whether the mapping is a special mapping (real PFN maps) | ||||||
|  | 	 */ | ||||||
|  | 	unsigned long pfn; | ||||||
|  | 	pgprot_t pgprot; | ||||||
|  | 	bool writable; | ||||||
|  | 	bool special; | ||||||
|  | }; | ||||||
|  | int follow_pfnmap_start(struct follow_pfnmap_args *args); | ||||||
|  | void follow_pfnmap_end(struct follow_pfnmap_args *args); | ||||||
|  | 
 | ||||||
| extern void truncate_pagecache(struct inode *inode, loff_t new); | extern void truncate_pagecache(struct inode *inode, loff_t new); | ||||||
| extern void truncate_setsize(struct inode *inode, loff_t newsize); | extern void truncate_setsize(struct inode *inode, loff_t newsize); | ||||||
| void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); | void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); | ||||||
|  |  | ||||||
							
								
								
									
										150
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										150
									
								
								mm/memory.c
									
									
									
									
									
								
							|  | @ -6172,6 +6172,156 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(follow_pte); | EXPORT_SYMBOL_GPL(follow_pte); | ||||||
| 
 | 
 | ||||||
|  | static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, | ||||||
|  | 				     spinlock_t *lock, pte_t *ptep, | ||||||
|  | 				     pgprot_t pgprot, unsigned long pfn_base, | ||||||
|  | 				     unsigned long addr_mask, bool writable, | ||||||
|  | 				     bool special) | ||||||
|  | { | ||||||
|  | 	args->lock = lock; | ||||||
|  | 	args->ptep = ptep; | ||||||
|  | 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); | ||||||
|  | 	args->pgprot = pgprot; | ||||||
|  | 	args->writable = writable; | ||||||
|  | 	args->special = special; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma) | ||||||
|  | { | ||||||
|  | #ifdef CONFIG_LOCKDEP | ||||||
|  | 	struct address_space *mapping = vma->vm_file->f_mapping; | ||||||
|  | 
 | ||||||
|  | 	if (mapping) | ||||||
|  | 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) || | ||||||
|  | 			       lockdep_is_held(&vma->vm_mm->mmap_lock)); | ||||||
|  | 	else | ||||||
|  | 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address | ||||||
|  |  * @args: Pointer to struct @follow_pfnmap_args | ||||||
|  |  * | ||||||
|  |  * The caller needs to setup args->vma and args->address to point to the | ||||||
|  |  * virtual address as the target of such lookup.  On a successful return, | ||||||
|  |  * the results will be put into other output fields. | ||||||
|  |  * | ||||||
|  |  * After the caller finished using the fields, the caller must invoke | ||||||
|  |  * another follow_pfnmap_end() to proper releases the locks and resources | ||||||
|  |  * of such look up request. | ||||||
|  |  * | ||||||
|  |  * During the start() and end() calls, the results in @args will be valid | ||||||
|  |  * as proper locks will be held.  After the end() is called, all the fields | ||||||
|  |  * in @follow_pfnmap_args will be invalid to be further accessed.  Further | ||||||
|  |  * use of such information after end() may require proper synchronizations | ||||||
|  |  * by the caller with page table updates, otherwise it can create a | ||||||
|  |  * security bug. | ||||||
|  |  * | ||||||
|  |  * If the PTE maps a refcounted page, callers are responsible to protect | ||||||
|  |  * against invalidation with MMU notifiers; otherwise access to the PFN at | ||||||
|  |  * a later point in time can trigger use-after-free. | ||||||
|  |  * | ||||||
|  |  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore | ||||||
|  |  * should be taken for read, and the mmap semaphore cannot be released | ||||||
|  |  * before the end() is invoked. | ||||||
|  |  * | ||||||
|  |  * This function must not be used to modify PTE content. | ||||||
|  |  * | ||||||
|  |  * Return: zero on success, negative otherwise. | ||||||
|  |  */ | ||||||
|  | int follow_pfnmap_start(struct follow_pfnmap_args *args) | ||||||
|  | { | ||||||
|  | 	struct vm_area_struct *vma = args->vma; | ||||||
|  | 	unsigned long address = args->address; | ||||||
|  | 	struct mm_struct *mm = vma->vm_mm; | ||||||
|  | 	spinlock_t *lock; | ||||||
|  | 	pgd_t *pgdp; | ||||||
|  | 	p4d_t *p4dp, p4d; | ||||||
|  | 	pud_t *pudp, pud; | ||||||
|  | 	pmd_t *pmdp, pmd; | ||||||
|  | 	pte_t *ptep, pte; | ||||||
|  | 
 | ||||||
|  | 	pfnmap_lockdep_assert(vma); | ||||||
|  | 
 | ||||||
|  | 	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||||||
|  | 		goto out; | ||||||
|  | retry: | ||||||
|  | 	pgdp = pgd_offset(mm, address); | ||||||
|  | 	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	p4dp = p4d_offset(pgdp, address); | ||||||
|  | 	p4d = READ_ONCE(*p4dp); | ||||||
|  | 	if (p4d_none(p4d) || unlikely(p4d_bad(p4d))) | ||||||
|  | 		goto out; | ||||||
|  | 
 | ||||||
|  | 	pudp = pud_offset(p4dp, address); | ||||||
|  | 	pud = READ_ONCE(*pudp); | ||||||
|  | 	if (pud_none(pud)) | ||||||
|  | 		goto out; | ||||||
|  | 	if (pud_leaf(pud)) { | ||||||
|  | 		lock = pud_lock(mm, pudp); | ||||||
|  | 		if (!unlikely(pud_leaf(pud))) { | ||||||
|  | 			spin_unlock(lock); | ||||||
|  | 			goto retry; | ||||||
|  | 		} | ||||||
|  | 		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud), | ||||||
|  | 				  pud_pfn(pud), PUD_MASK, pud_write(pud), | ||||||
|  | 				  pud_special(pud)); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	pmdp = pmd_offset(pudp, address); | ||||||
|  | 	pmd = pmdp_get_lockless(pmdp); | ||||||
|  | 	if (pmd_leaf(pmd)) { | ||||||
|  | 		lock = pmd_lock(mm, pmdp); | ||||||
|  | 		if (!unlikely(pmd_leaf(pmd))) { | ||||||
|  | 			spin_unlock(lock); | ||||||
|  | 			goto retry; | ||||||
|  | 		} | ||||||
|  | 		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd), | ||||||
|  | 				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd), | ||||||
|  | 				  pmd_special(pmd)); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ptep = pte_offset_map_lock(mm, pmdp, address, &lock); | ||||||
|  | 	if (!ptep) | ||||||
|  | 		goto out; | ||||||
|  | 	pte = ptep_get(ptep); | ||||||
|  | 	if (!pte_present(pte)) | ||||||
|  | 		goto unlock; | ||||||
|  | 	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte), | ||||||
|  | 			  pte_pfn(pte), PAGE_MASK, pte_write(pte), | ||||||
|  | 			  pte_special(pte)); | ||||||
|  | 	return 0; | ||||||
|  | unlock: | ||||||
|  | 	pte_unmap_unlock(ptep, lock); | ||||||
|  | out: | ||||||
|  | 	return -EINVAL; | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(follow_pfnmap_start); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * follow_pfnmap_end(): End a follow_pfnmap_start() process | ||||||
|  |  * @args: Pointer to struct @follow_pfnmap_args | ||||||
|  |  * | ||||||
|  |  * Must be used in pair of follow_pfnmap_start().  See the start() function | ||||||
|  |  * above for more information. | ||||||
|  |  */ | ||||||
|  | void follow_pfnmap_end(struct follow_pfnmap_args *args) | ||||||
|  | { | ||||||
|  | 	if (args->lock) | ||||||
|  | 		spin_unlock(args->lock); | ||||||
|  | 	if (args->ptep) | ||||||
|  | 		pte_unmap(args->ptep); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL_GPL(follow_pfnmap_end); | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_HAVE_IOREMAP_PROT | #ifdef CONFIG_HAVE_IOREMAP_PROT | ||||||
| /**
 | /**
 | ||||||
|  * generic_access_phys - generic implementation for iomem mmap access |  * generic_access_phys - generic implementation for iomem mmap access | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Peter Xu
						Peter Xu