forked from mirrors/linux
		
	mm: new follow_pfnmap API
Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gavin Shan <gshan@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Niklas Schnelle <schnelle@linux.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0515e022e1
								
							
						
					
					
						commit
						6da8e9634b
					
				
					 2 changed files with 181 additions and 0 deletions
				
			
		|  | @ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, | |||
| int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||||
| 			void *buf, int len, int write); | ||||
| 
 | ||||
| struct follow_pfnmap_args { | ||||
| 	/**
 | ||||
| 	 * Inputs: | ||||
| 	 * @vma: Pointer to @vm_area_struct struct | ||||
| 	 * @address: the virtual address to walk | ||||
| 	 */ | ||||
| 	struct vm_area_struct *vma; | ||||
| 	unsigned long address; | ||||
| 	/**
 | ||||
| 	 * Internals: | ||||
| 	 * | ||||
| 	 * The caller shouldn't touch any of these. | ||||
| 	 */ | ||||
| 	spinlock_t *lock; | ||||
| 	pte_t *ptep; | ||||
| 	/**
 | ||||
| 	 * Outputs: | ||||
| 	 * | ||||
| 	 * @pfn: the PFN of the address | ||||
| 	 * @pgprot: the pgprot_t of the mapping | ||||
| 	 * @writable: whether the mapping is writable | ||||
| 	 * @special: whether the mapping is a special mapping (real PFN maps) | ||||
| 	 */ | ||||
| 	unsigned long pfn; | ||||
| 	pgprot_t pgprot; | ||||
| 	bool writable; | ||||
| 	bool special; | ||||
| }; | ||||
| int follow_pfnmap_start(struct follow_pfnmap_args *args); | ||||
| void follow_pfnmap_end(struct follow_pfnmap_args *args); | ||||
| 
 | ||||
| extern void truncate_pagecache(struct inode *inode, loff_t new); | ||||
| extern void truncate_setsize(struct inode *inode, loff_t newsize); | ||||
| void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); | ||||
|  |  | |||
							
								
								
									
										150
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										150
									
								
								mm/memory.c
									
									
									
									
									
								
							|  | @ -6172,6 +6172,156 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address, | |||
| } | ||||
| EXPORT_SYMBOL_GPL(follow_pte); | ||||
| 
 | ||||
| static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, | ||||
| 				     spinlock_t *lock, pte_t *ptep, | ||||
| 				     pgprot_t pgprot, unsigned long pfn_base, | ||||
| 				     unsigned long addr_mask, bool writable, | ||||
| 				     bool special) | ||||
| { | ||||
| 	args->lock = lock; | ||||
| 	args->ptep = ptep; | ||||
| 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); | ||||
| 	args->pgprot = pgprot; | ||||
| 	args->writable = writable; | ||||
| 	args->special = special; | ||||
| } | ||||
| 
 | ||||
| static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma) | ||||
| { | ||||
| #ifdef CONFIG_LOCKDEP | ||||
| 	struct address_space *mapping = vma->vm_file->f_mapping; | ||||
| 
 | ||||
| 	if (mapping) | ||||
| 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) || | ||||
| 			       lockdep_is_held(&vma->vm_mm->mmap_lock)); | ||||
| 	else | ||||
| 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address | ||||
|  * @args: Pointer to struct @follow_pfnmap_args | ||||
|  * | ||||
|  * The caller needs to setup args->vma and args->address to point to the | ||||
|  * virtual address as the target of such lookup.  On a successful return, | ||||
|  * the results will be put into other output fields. | ||||
|  * | ||||
|  * After the caller finished using the fields, the caller must invoke | ||||
|  * another follow_pfnmap_end() to proper releases the locks and resources | ||||
|  * of such look up request. | ||||
|  * | ||||
|  * During the start() and end() calls, the results in @args will be valid | ||||
|  * as proper locks will be held.  After the end() is called, all the fields | ||||
|  * in @follow_pfnmap_args will be invalid to be further accessed.  Further | ||||
|  * use of such information after end() may require proper synchronizations | ||||
|  * by the caller with page table updates, otherwise it can create a | ||||
|  * security bug. | ||||
|  * | ||||
|  * If the PTE maps a refcounted page, callers are responsible to protect | ||||
|  * against invalidation with MMU notifiers; otherwise access to the PFN at | ||||
|  * a later point in time can trigger use-after-free. | ||||
|  * | ||||
|  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore | ||||
|  * should be taken for read, and the mmap semaphore cannot be released | ||||
|  * before the end() is invoked. | ||||
|  * | ||||
|  * This function must not be used to modify PTE content. | ||||
|  * | ||||
|  * Return: zero on success, negative otherwise. | ||||
|  */ | ||||
| int follow_pfnmap_start(struct follow_pfnmap_args *args) | ||||
| { | ||||
| 	struct vm_area_struct *vma = args->vma; | ||||
| 	unsigned long address = args->address; | ||||
| 	struct mm_struct *mm = vma->vm_mm; | ||||
| 	spinlock_t *lock; | ||||
| 	pgd_t *pgdp; | ||||
| 	p4d_t *p4dp, p4d; | ||||
| 	pud_t *pudp, pud; | ||||
| 	pmd_t *pmdp, pmd; | ||||
| 	pte_t *ptep, pte; | ||||
| 
 | ||||
| 	pfnmap_lockdep_assert(vma); | ||||
| 
 | ||||
| 	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||||
| 		goto out; | ||||
| retry: | ||||
| 	pgdp = pgd_offset(mm, address); | ||||
| 	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	p4dp = p4d_offset(pgdp, address); | ||||
| 	p4d = READ_ONCE(*p4dp); | ||||
| 	if (p4d_none(p4d) || unlikely(p4d_bad(p4d))) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	pudp = pud_offset(p4dp, address); | ||||
| 	pud = READ_ONCE(*pudp); | ||||
| 	if (pud_none(pud)) | ||||
| 		goto out; | ||||
| 	if (pud_leaf(pud)) { | ||||
| 		lock = pud_lock(mm, pudp); | ||||
| 		if (!unlikely(pud_leaf(pud))) { | ||||
| 			spin_unlock(lock); | ||||
| 			goto retry; | ||||
| 		} | ||||
| 		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud), | ||||
| 				  pud_pfn(pud), PUD_MASK, pud_write(pud), | ||||
| 				  pud_special(pud)); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	pmdp = pmd_offset(pudp, address); | ||||
| 	pmd = pmdp_get_lockless(pmdp); | ||||
| 	if (pmd_leaf(pmd)) { | ||||
| 		lock = pmd_lock(mm, pmdp); | ||||
| 		if (!unlikely(pmd_leaf(pmd))) { | ||||
| 			spin_unlock(lock); | ||||
| 			goto retry; | ||||
| 		} | ||||
| 		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd), | ||||
| 				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd), | ||||
| 				  pmd_special(pmd)); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	ptep = pte_offset_map_lock(mm, pmdp, address, &lock); | ||||
| 	if (!ptep) | ||||
| 		goto out; | ||||
| 	pte = ptep_get(ptep); | ||||
| 	if (!pte_present(pte)) | ||||
| 		goto unlock; | ||||
| 	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte), | ||||
| 			  pte_pfn(pte), PAGE_MASK, pte_write(pte), | ||||
| 			  pte_special(pte)); | ||||
| 	return 0; | ||||
| unlock: | ||||
| 	pte_unmap_unlock(ptep, lock); | ||||
| out: | ||||
| 	return -EINVAL; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(follow_pfnmap_start); | ||||
| 
 | ||||
| /**
 | ||||
|  * follow_pfnmap_end(): End a follow_pfnmap_start() process | ||||
|  * @args: Pointer to struct @follow_pfnmap_args | ||||
|  * | ||||
|  * Must be used in pair of follow_pfnmap_start().  See the start() function | ||||
|  * above for more information. | ||||
|  */ | ||||
| void follow_pfnmap_end(struct follow_pfnmap_args *args) | ||||
| { | ||||
| 	if (args->lock) | ||||
| 		spin_unlock(args->lock); | ||||
| 	if (args->ptep) | ||||
| 		pte_unmap(args->ptep); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(follow_pfnmap_end); | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_IOREMAP_PROT | ||||
| /**
 | ||||
|  * generic_access_phys - generic implementation for iomem mmap access | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Peter Xu
						Peter Xu