forked from mirrors/linux
		
	powerpc: implement the new page table range API
Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio(). Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to per-folio. [willy@infradead.org: re-export flush_dcache_icache_folio()] Link: https://lkml.kernel.org/r/ZMx1daYwvD9EM7Cv@casper.infradead.org Link: https://lkml.kernel.org/r/20230802151406.3735276-22-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									e70bbca607
								
							
						
					
					
						commit
						9fee28baa6
					
				
					 11 changed files with 89 additions and 93 deletions
				
			
		|  | @ -462,11 +462,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) | |||
| 		     pgprot_val(pgprot)); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long pte_pfn(pte_t pte) | ||||
| { | ||||
| 	return pte_val(pte) >> PTE_RPN_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /* Generic modifiers for PTE bits */ | ||||
| static inline pte_t pte_wrprotect(pte_t pte) | ||||
| { | ||||
|  |  | |||
|  | @ -104,6 +104,7 @@ | |||
|  * and every thing below PAGE_SHIFT; | ||||
|  */ | ||||
| #define PTE_RPN_MASK	(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK)) | ||||
| #define PTE_RPN_SHIFT	PAGE_SHIFT | ||||
| /*
 | ||||
|  * set of bits not changed in pmd_modify. Even though we have hash specific bits | ||||
|  * in here, on radix we expect them to be zero. | ||||
|  | @ -569,11 +570,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) | |||
| 	return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long pte_pfn(pte_t pte) | ||||
| { | ||||
| 	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /* Generic modifiers for PTE bits */ | ||||
| static inline pte_t pte_wrprotect(pte_t pte) | ||||
| { | ||||
|  |  | |||
|  | @ -9,13 +9,6 @@ | |||
| #endif | ||||
| 
 | ||||
| #ifndef __ASSEMBLY__ | ||||
| /* Insert a PTE, top-level function is out of line. It uses an inline
 | ||||
|  * low level function in the respective pgtable-* files | ||||
|  */ | ||||
| extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | ||||
| 		       pte_t pte); | ||||
| 
 | ||||
| 
 | ||||
| #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | ||||
| extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, | ||||
| 				 pte_t *ptep, pte_t entry, int dirty); | ||||
|  | @ -36,7 +29,9 @@ void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t | |||
|  * corresponding HPTE into the hash table ahead of time, instead of | ||||
|  * waiting for the inevitable extra hash-table miss exception. | ||||
|  */ | ||||
| static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) | ||||
| static inline void update_mmu_cache_range(struct vm_fault *vmf, | ||||
| 		struct vm_area_struct *vma, unsigned long address, | ||||
| 		pte_t *ptep, unsigned int nr) | ||||
| { | ||||
| 	if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) | ||||
| 		return; | ||||
|  |  | |||
|  | @ -35,13 +35,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) | |||
|  * It just marks the page as not i-cache clean.  We do the i-cache | ||||
|  * flush later when the page is given to a user process, if necessary. | ||||
|  */ | ||||
| static inline void flush_dcache_page(struct page *page) | ||||
| static inline void flush_dcache_folio(struct folio *folio) | ||||
| { | ||||
| 	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||||
| 		return; | ||||
| 	/* avoid an atomic op if possible */ | ||||
| 	if (test_bit(PG_dcache_clean, &page->flags)) | ||||
| 		clear_bit(PG_dcache_clean, &page->flags); | ||||
| 	if (test_bit(PG_dcache_clean, &folio->flags)) | ||||
| 		clear_bit(PG_dcache_clean, &folio->flags); | ||||
| } | ||||
| #define flush_dcache_folio flush_dcache_folio | ||||
| 
 | ||||
| static inline void flush_dcache_page(struct page *page) | ||||
| { | ||||
| 	flush_dcache_folio(page_folio(page)); | ||||
| } | ||||
| 
 | ||||
| void flush_icache_range(unsigned long start, unsigned long stop); | ||||
|  | @ -51,7 +57,7 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, | |||
| 		unsigned long addr, int len); | ||||
| #define flush_icache_user_page flush_icache_user_page | ||||
| 
 | ||||
| void flush_dcache_icache_page(struct page *page); | ||||
| void flush_dcache_icache_folio(struct folio *folio); | ||||
| 
 | ||||
| /**
 | ||||
|  * flush_dcache_range(): Write any modified data cache blocks out to memory and | ||||
|  |  | |||
|  | @ -894,7 +894,7 @@ void kvmppc_init_lpid(unsigned long nr_lpids); | |||
| 
 | ||||
| static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	struct folio *folio; | ||||
| 	/*
 | ||||
| 	 * We can only access pages that the kernel maps | ||||
| 	 * as memory. Bail out for unmapped ones. | ||||
|  | @ -903,10 +903,10 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) | |||
| 		return; | ||||
| 
 | ||||
| 	/* Clear i-cache for new pages */ | ||||
| 	page = pfn_to_page(pfn); | ||||
| 	if (!test_bit(PG_dcache_clean, &page->flags)) { | ||||
| 		flush_dcache_icache_page(page); | ||||
| 		set_bit(PG_dcache_clean, &page->flags); | ||||
| 	folio = page_folio(pfn_to_page(pfn)); | ||||
| 	if (!test_bit(PG_dcache_clean, &folio->flags)) { | ||||
| 		flush_dcache_icache_folio(folio); | ||||
| 		set_bit(PG_dcache_clean, &folio->flags); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -101,8 +101,6 @@ static inline bool pte_access_permitted(pte_t pte, bool write) | |||
| static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) { | ||||
| 	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | | ||||
| 		     pgprot_val(pgprot)); } | ||||
| static inline unsigned long pte_pfn(pte_t pte)	{ | ||||
| 	return pte_val(pte) >> PTE_RPN_SHIFT; } | ||||
| 
 | ||||
| /* Generic modifiers for PTE bits */ | ||||
| static inline pte_t pte_exprotect(pte_t pte) | ||||
|  | @ -166,12 +164,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) | |||
| 	return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE); | ||||
| } | ||||
| 
 | ||||
| /* Insert a PTE, top-level function is out of line. It uses an inline
 | ||||
|  * low level function in the respective pgtable-* files | ||||
|  */ | ||||
| extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | ||||
| 		       pte_t pte); | ||||
| 
 | ||||
| /* This low level function performs the actual PTE insertion
 | ||||
|  * Setting the PTE depends on the MMU type and other factors. It's | ||||
|  * an horrible mess that I'm not going to try to clean up now but | ||||
|  | @ -282,10 +274,12 @@ static inline int pud_huge(pud_t pud) | |||
|  * for the page which has just been mapped in. | ||||
|  */ | ||||
| #if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE) | ||||
| void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); | ||||
| void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, | ||||
| 		unsigned long address, pte_t *ptep, unsigned int nr); | ||||
| #else | ||||
| static inline | ||||
| void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} | ||||
| static inline void update_mmu_cache_range(struct vm_fault *vmf, | ||||
| 		struct vm_area_struct *vma, unsigned long address, | ||||
| 		pte_t *ptep, unsigned int nr) {} | ||||
| #endif | ||||
| 
 | ||||
| #endif /* __ASSEMBLY__ */ | ||||
|  |  | |||
|  | @ -41,6 +41,12 @@ struct mm_struct; | |||
| 
 | ||||
| #ifndef __ASSEMBLY__ | ||||
| 
 | ||||
| void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | ||||
| 		pte_t pte, unsigned int nr); | ||||
| #define set_ptes set_ptes | ||||
| #define update_mmu_cache(vma, addr, ptep) \ | ||||
| 	update_mmu_cache_range(NULL, vma, addr, ptep, 1) | ||||
| 
 | ||||
| #ifndef MAX_PTRS_PER_PGD | ||||
| #define MAX_PTRS_PER_PGD PTRS_PER_PGD | ||||
| #endif | ||||
|  | @ -48,6 +54,12 @@ struct mm_struct; | |||
| /* Keep these as a macros to avoid include dependency mess */ | ||||
| #define pte_page(x)		pfn_to_page(pte_pfn(x)) | ||||
| #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot)) | ||||
| 
 | ||||
| static inline unsigned long pte_pfn(pte_t pte) | ||||
| { | ||||
| 	return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Select all bits except the pfn | ||||
|  */ | ||||
|  |  | |||
|  | @ -1307,18 +1307,19 @@ void hash__early_init_mmu_secondary(void) | |||
|  */ | ||||
| unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	struct folio *folio; | ||||
| 
 | ||||
| 	if (!pfn_valid(pte_pfn(pte))) | ||||
| 		return pp; | ||||
| 
 | ||||
| 	page = pte_page(pte); | ||||
| 	folio = page_folio(pte_page(pte)); | ||||
| 
 | ||||
| 	/* page is dirty */ | ||||
| 	if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { | ||||
| 	if (!test_bit(PG_dcache_clean, &folio->flags) && | ||||
| 	    !folio_test_reserved(folio)) { | ||||
| 		if (trap == INTERRUPT_INST_STORAGE) { | ||||
| 			flush_dcache_icache_page(page); | ||||
| 			set_bit(PG_dcache_clean, &page->flags); | ||||
| 			flush_dcache_icache_folio(folio); | ||||
| 			set_bit(PG_dcache_clean, &folio->flags); | ||||
| 		} else | ||||
| 			pp |= HPTE_R_N; | ||||
| 	} | ||||
|  |  | |||
|  | @ -148,44 +148,31 @@ static void __flush_dcache_icache(void *p) | |||
| 	invalidate_icache_range(addr, addr + PAGE_SIZE); | ||||
| } | ||||
| 
 | ||||
| static void flush_dcache_icache_hugepage(struct page *page) | ||||
| void flush_dcache_icache_folio(struct folio *folio) | ||||
| { | ||||
| 	int i; | ||||
| 	int nr = compound_nr(page); | ||||
| 	unsigned int i, nr = folio_nr_pages(folio); | ||||
| 
 | ||||
| 	if (!PageHighMem(page)) { | ||||
| 	if (flush_coherent_icache()) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (!folio_test_highmem(folio)) { | ||||
| 		void *addr = folio_address(folio); | ||||
| 		for (i = 0; i < nr; i++) | ||||
| 			__flush_dcache_icache(lowmem_page_address(page + i)); | ||||
| 	} else { | ||||
| 			__flush_dcache_icache(addr + i * PAGE_SIZE); | ||||
| 	} else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { | ||||
| 		for (i = 0; i < nr; i++) { | ||||
| 			void *start = kmap_local_page(page + i); | ||||
| 			void *start = kmap_local_folio(folio, i * PAGE_SIZE); | ||||
| 
 | ||||
| 			__flush_dcache_icache(start); | ||||
| 			kunmap_local(start); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void flush_dcache_icache_page(struct page *page) | ||||
| { | ||||
| 	if (flush_coherent_icache()) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (PageCompound(page)) | ||||
| 		return flush_dcache_icache_hugepage(page); | ||||
| 
 | ||||
| 	if (!PageHighMem(page)) { | ||||
| 		__flush_dcache_icache(lowmem_page_address(page)); | ||||
| 	} else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { | ||||
| 		void *start = kmap_local_page(page); | ||||
| 
 | ||||
| 		__flush_dcache_icache(start); | ||||
| 		kunmap_local(start); | ||||
| 	} else { | ||||
| 		flush_dcache_icache_phys(page_to_phys(page)); | ||||
| 		unsigned long pfn = folio_pfn(folio); | ||||
| 		for (i = 0; i < nr; i++) | ||||
| 			flush_dcache_icache_phys((pfn + i) * PAGE_SIZE); | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL(flush_dcache_icache_page); | ||||
| EXPORT_SYMBOL(flush_dcache_icache_folio); | ||||
| 
 | ||||
| void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | ||||
| { | ||||
|  |  | |||
|  | @ -178,7 +178,8 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) | |||
|  * | ||||
|  * This must always be called with the pte lock held. | ||||
|  */ | ||||
| void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) | ||||
| void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, | ||||
| 		unsigned long address, pte_t *ptep, unsigned int nr) | ||||
| { | ||||
| 	if (is_vm_hugetlb_page(vma)) | ||||
| 		book3e_hugetlb_preload(vma, address, *ptep); | ||||
|  |  | |||
|  | @ -58,7 +58,7 @@ static inline int pte_looks_normal(pte_t pte) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static struct page *maybe_pte_to_page(pte_t pte) | ||||
| static struct folio *maybe_pte_to_folio(pte_t pte) | ||||
| { | ||||
| 	unsigned long pfn = pte_pfn(pte); | ||||
| 	struct page *page; | ||||
|  | @ -68,7 +68,7 @@ static struct page *maybe_pte_to_page(pte_t pte) | |||
| 	page = pfn_to_page(pfn); | ||||
| 	if (PageReserved(page)) | ||||
| 		return NULL; | ||||
| 	return page; | ||||
| 	return page_folio(page); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PPC_BOOK3S | ||||
|  | @ -84,12 +84,12 @@ static pte_t set_pte_filter_hash(pte_t pte) | |||
| 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||||
| 	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || | ||||
| 				       cpu_has_feature(CPU_FTR_NOEXECUTE))) { | ||||
| 		struct page *pg = maybe_pte_to_page(pte); | ||||
| 		if (!pg) | ||||
| 		struct folio *folio = maybe_pte_to_folio(pte); | ||||
| 		if (!folio) | ||||
| 			return pte; | ||||
| 		if (!test_bit(PG_dcache_clean, &pg->flags)) { | ||||
| 			flush_dcache_icache_page(pg); | ||||
| 			set_bit(PG_dcache_clean, &pg->flags); | ||||
| 		if (!test_bit(PG_dcache_clean, &folio->flags)) { | ||||
| 			flush_dcache_icache_folio(folio); | ||||
| 			set_bit(PG_dcache_clean, &folio->flags); | ||||
| 		} | ||||
| 	} | ||||
| 	return pte; | ||||
|  | @ -107,7 +107,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } | |||
|  */ | ||||
| static inline pte_t set_pte_filter(pte_t pte) | ||||
| { | ||||
| 	struct page *pg; | ||||
| 	struct folio *folio; | ||||
| 
 | ||||
| 	if (radix_enabled()) | ||||
| 		return pte; | ||||
|  | @ -120,18 +120,18 @@ static inline pte_t set_pte_filter(pte_t pte) | |||
| 		return pte; | ||||
| 
 | ||||
| 	/* If you set _PAGE_EXEC on weird pages you're on your own */ | ||||
| 	pg = maybe_pte_to_page(pte); | ||||
| 	if (unlikely(!pg)) | ||||
| 	folio = maybe_pte_to_folio(pte); | ||||
| 	if (unlikely(!folio)) | ||||
| 		return pte; | ||||
| 
 | ||||
| 	/* If the page clean, we move on */ | ||||
| 	if (test_bit(PG_dcache_clean, &pg->flags)) | ||||
| 	if (test_bit(PG_dcache_clean, &folio->flags)) | ||||
| 		return pte; | ||||
| 
 | ||||
| 	/* If it's an exec fault, we flush the cache and make it clean */ | ||||
| 	if (is_exec_fault()) { | ||||
| 		flush_dcache_icache_page(pg); | ||||
| 		set_bit(PG_dcache_clean, &pg->flags); | ||||
| 		flush_dcache_icache_folio(folio); | ||||
| 		set_bit(PG_dcache_clean, &folio->flags); | ||||
| 		return pte; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -142,7 +142,7 @@ static inline pte_t set_pte_filter(pte_t pte) | |||
| static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, | ||||
| 				     int dirty) | ||||
| { | ||||
| 	struct page *pg; | ||||
| 	struct folio *folio; | ||||
| 
 | ||||
| 	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) | ||||
| 		return pte; | ||||
|  | @ -168,17 +168,17 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, | |||
| #endif /* CONFIG_DEBUG_VM */ | ||||
| 
 | ||||
| 	/* If you set _PAGE_EXEC on weird pages you're on your own */ | ||||
| 	pg = maybe_pte_to_page(pte); | ||||
| 	if (unlikely(!pg)) | ||||
| 	folio = maybe_pte_to_folio(pte); | ||||
| 	if (unlikely(!folio)) | ||||
| 		goto bail; | ||||
| 
 | ||||
| 	/* If the page is already clean, we move on */ | ||||
| 	if (test_bit(PG_dcache_clean, &pg->flags)) | ||||
| 	if (test_bit(PG_dcache_clean, &folio->flags)) | ||||
| 		goto bail; | ||||
| 
 | ||||
| 	/* Clean the page and set PG_dcache_clean */ | ||||
| 	flush_dcache_icache_page(pg); | ||||
| 	set_bit(PG_dcache_clean, &pg->flags); | ||||
| 	flush_dcache_icache_folio(folio); | ||||
| 	set_bit(PG_dcache_clean, &folio->flags); | ||||
| 
 | ||||
|  bail: | ||||
| 	return pte_mkexec(pte); | ||||
|  | @ -187,8 +187,8 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, | |||
| /*
 | ||||
|  * set_pte stores a linux PTE into the linux page table. | ||||
|  */ | ||||
| void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | ||||
| 		pte_t pte) | ||||
| void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | ||||
| 		pte_t pte, unsigned int nr) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Make sure hardware valid bit is not set. We don't do | ||||
|  | @ -203,7 +203,16 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, | |||
| 	pte = set_pte_filter(pte); | ||||
| 
 | ||||
| 	/* Perform the setting of the PTE */ | ||||
| 	__set_pte_at(mm, addr, ptep, pte, 0); | ||||
| 	arch_enter_lazy_mmu_mode(); | ||||
| 	for (;;) { | ||||
| 		__set_pte_at(mm, addr, ptep, pte, 0); | ||||
| 		if (--nr == 0) | ||||
| 			break; | ||||
| 		ptep++; | ||||
| 		pte = __pte(pte_val(pte) + (1UL << PTE_RPN_SHIFT)); | ||||
| 		addr += PAGE_SIZE; | ||||
| 	} | ||||
| 	arch_leave_lazy_mmu_mode(); | ||||
| } | ||||
| 
 | ||||
| void unmap_kernel_page(unsigned long va) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Matthew Wilcox (Oracle)
						Matthew Wilcox (Oracle)