mirror of
https://github.com/torvalds/linux.git
synced 2025-11-02 01:29:02 +02:00
The ISA suggests ptesync after setting a pte, to prevent a table walk initiated by a subsequent access from missing that store and causing a spurious fault. This is an architectual allowance that allows an implementation's page table walker to be incoherent with the store queue. However there is no correctness problem in taking a spurious fault in userspace -- the kernel copes with these at any time, so the updated pte will be found eventually. Spurious kernel faults on vmap memory must be avoided, so a ptesync is put into flush_cache_vmap. On POWER9 so far I have not found a measurable window where this can result in more minor faults, so as an optimisation, remove the costly ptesync from pte updates. If an implementation benefits from ptesync, it would be better to add it back in update_mmu_cache, so it's not done for things like fork(2). fork --fork --exec benchmark improved 5.2% (12400->13100). Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
127 lines
4.2 KiB
C
127 lines
4.2 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _ASM_POWERPC_CACHEFLUSH_H
|
|
#define _ASM_POWERPC_CACHEFLUSH_H
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/mm.h>
|
|
#include <asm/cputable.h>
|
|
#include <asm/cpu_has_feature.h>
|
|
|
|
/*
|
|
* No cache flushing is required when address mappings are changed,
|
|
* because the caches on PowerPCs are physically addressed.
|
|
*/
|
|
#define flush_cache_all() do { } while (0)
|
|
#define flush_cache_mm(mm) do { } while (0)
|
|
#define flush_cache_dup_mm(mm) do { } while (0)
|
|
#define flush_cache_range(vma, start, end) do { } while (0)
|
|
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
|
|
#define flush_icache_page(vma, page) do { } while (0)
|
|
#define flush_cache_vmap(start, end) do { } while (0)
|
|
#define flush_cache_vunmap(start, end) do { } while (0)
|
|
|
|
#ifdef CONFIG_BOOK3S_64
|
|
/*
|
|
* Book3s has no ptesync after setting a pte, so without this ptesync it's
|
|
* possible for a kernel virtual mapping access to return a spurious fault
|
|
* if it's accessed right after the pte is set. The page fault handler does
|
|
* not expect this type of fault. flush_cache_vmap is not exactly the right
|
|
* place to put this, but it seems to work well enough.
|
|
*/
|
|
#define flush_cache_vmap(start, end) do { asm volatile("ptesync"); } while (0)
|
|
#else
|
|
#define flush_cache_vmap(start, end) do { } while (0)
|
|
#endif
|
|
|
|
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
|
extern void flush_dcache_page(struct page *page);
|
|
#define flush_dcache_mmap_lock(mapping) do { } while (0)
|
|
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
|
|
|
|
extern void flush_icache_range(unsigned long, unsigned long);
|
|
extern void flush_icache_user_range(struct vm_area_struct *vma,
|
|
struct page *page, unsigned long addr,
|
|
int len);
|
|
extern void __flush_dcache_icache(void *page_va);
|
|
extern void flush_dcache_icache_page(struct page *page);
|
|
#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
|
|
extern void __flush_dcache_icache_phys(unsigned long physaddr);
|
|
#else
|
|
static inline void __flush_dcache_icache_phys(unsigned long physaddr)
|
|
{
|
|
BUG();
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_PPC32
|
|
/*
|
|
* Write any modified data cache blocks out to memory and invalidate them.
|
|
* Does not invalidate the corresponding instruction cache blocks.
|
|
*/
|
|
static inline void flush_dcache_range(unsigned long start, unsigned long stop)
|
|
{
|
|
void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
|
|
unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
|
|
unsigned long i;
|
|
|
|
for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
|
|
dcbf(addr);
|
|
mb(); /* sync */
|
|
}
|
|
|
|
/*
|
|
* Write any modified data cache blocks out to memory.
|
|
* Does not invalidate the corresponding cache lines (especially for
|
|
* any corresponding instruction cache).
|
|
*/
|
|
static inline void clean_dcache_range(unsigned long start, unsigned long stop)
|
|
{
|
|
void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
|
|
unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
|
|
unsigned long i;
|
|
|
|
for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
|
|
dcbst(addr);
|
|
mb(); /* sync */
|
|
}
|
|
|
|
/*
|
|
* Like above, but invalidate the D-cache. This is used by the 8xx
|
|
* to invalidate the cache so the PPC core doesn't get stale data
|
|
* from the CPM (no cache snooping here :-).
|
|
*/
|
|
static inline void invalidate_dcache_range(unsigned long start,
|
|
unsigned long stop)
|
|
{
|
|
void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
|
|
unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
|
|
unsigned long i;
|
|
|
|
for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
|
|
dcbi(addr);
|
|
mb(); /* sync */
|
|
}
|
|
|
|
#endif /* CONFIG_PPC32 */
|
|
#ifdef CONFIG_PPC64
|
|
extern void flush_dcache_range(unsigned long start, unsigned long stop);
|
|
extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
|
|
#endif
|
|
|
|
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
|
|
do { \
|
|
memcpy(dst, src, len); \
|
|
flush_icache_user_range(vma, page, vaddr, len); \
|
|
} while (0)
|
|
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
|
memcpy(dst, src, len)
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* _ASM_POWERPC_CACHEFLUSH_H */
|