forked from mirrors/linux
		
	x86/mm/dump_pagetables: Speed up page tables dump for CONFIG_KASAN=y
KASAN fills kernel page tables with repeated values to map several
TBs of the virtual memory to the single kasan_zero_page:
  kasan_zero_p4d ->
    kasan_zero_pud ->
        kasan_zero_pmd->
            kasan_zero_pte->
                kasan_zero_page
Walking the whole KASAN shadow range takes a lot of time, especially
with 5-level page tables. Since we already know that all kasan page tables
eventually point to the kasan_zero_page we could call note_page()
right and avoid walking lower levels of the page tables.
This will not affect the output of the kernel_page_tables file,
but let us avoid spending time in page table walkers:
Before:
  $ time cat /sys/kernel/debug/kernel_page_tables > /dev/null
  real    0m55.855s
  user    0m0.000s
  sys     0m55.840s
After:
  $ time cat /sys/kernel/debug/kernel_page_tables > /dev/null
  real    0m0.054s
  user    0m0.000s
  sys     0m0.054s
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170724152558.24689-1-aryabinin@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									10af6235e0
								
							
						
					
					
						commit
						04b67022fb
					
				
					 1 changed files with 41 additions and 23 deletions
				
			
		| 
						 | 
				
			
			@ -13,12 +13,12 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include <linux/debugfs.h>
 | 
			
		||||
#include <linux/kasan.h>
 | 
			
		||||
#include <linux/mm.h>
 | 
			
		||||
#include <linux/init.h>
 | 
			
		||||
#include <linux/sched.h>
 | 
			
		||||
#include <linux/seq_file.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/kasan.h>
 | 
			
		||||
#include <asm/pgtable.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -302,23 +302,53 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 | 
			
		|||
		start++;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
#ifdef CONFIG_KASAN
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * This is an optimization for KASAN=y case. Since all kasan page tables
 | 
			
		||||
 * eventually point to the kasan_zero_page we could call note_page()
 | 
			
		||||
 * right away without walking through lower level page tables. This saves
 | 
			
		||||
 * us dozens of seconds (minutes for 5-level config) while checking for
 | 
			
		||||
 * W+X mapping or reading kernel_page_tables debugfs file.
 | 
			
		||||
 */
 | 
			
		||||
static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
 | 
			
		||||
				void *pt)
 | 
			
		||||
{
 | 
			
		||||
	if (__pa(pt) == __pa(kasan_zero_pmd) ||
 | 
			
		||||
#ifdef CONFIG_X86_5LEVEL
 | 
			
		||||
	    __pa(pt) == __pa(kasan_zero_p4d) ||
 | 
			
		||||
#endif
 | 
			
		||||
	    __pa(pt) == __pa(kasan_zero_pud)) {
 | 
			
		||||
		pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
 | 
			
		||||
		note_page(m, st, __pgprot(prot), 5);
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
 | 
			
		||||
				void *pt)
 | 
			
		||||
{
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if PTRS_PER_PMD > 1
 | 
			
		||||
 | 
			
		||||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	pmd_t *start;
 | 
			
		||||
	pmd_t *start, *pmd_start;
 | 
			
		||||
	pgprotval_t prot;
 | 
			
		||||
 | 
			
		||||
	start = (pmd_t *)pud_page_vaddr(addr);
 | 
			
		||||
	pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
 | 
			
		||||
	for (i = 0; i < PTRS_PER_PMD; i++) {
 | 
			
		||||
		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 | 
			
		||||
		if (!pmd_none(*start)) {
 | 
			
		||||
			if (pmd_large(*start) || !pmd_present(*start)) {
 | 
			
		||||
				prot = pmd_flags(*start);
 | 
			
		||||
				note_page(m, st, __pgprot(prot), 4);
 | 
			
		||||
			} else {
 | 
			
		||||
			} else if (!kasan_page_table(m, st, pmd_start)) {
 | 
			
		||||
				walk_pte_level(m, st, *start,
 | 
			
		||||
					       P + i * PMD_LEVEL_MULT);
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -336,34 +366,22 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 | 
			
		|||
 | 
			
		||||
#if PTRS_PER_PUD > 1
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
 | 
			
		||||
 * KASAN fills page tables with the same values. Since there is no
 | 
			
		||||
 * point in checking page table more than once we just skip repeated
 | 
			
		||||
 * entries. This saves us dozens of seconds during boot.
 | 
			
		||||
 */
 | 
			
		||||
static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
 | 
			
		||||
{
 | 
			
		||||
	return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	pud_t *start;
 | 
			
		||||
	pud_t *start, *pud_start;
 | 
			
		||||
	pgprotval_t prot;
 | 
			
		||||
	pud_t *prev_pud = NULL;
 | 
			
		||||
 | 
			
		||||
	start = (pud_t *)p4d_page_vaddr(addr);
 | 
			
		||||
	pud_start = start = (pud_t *)p4d_page_vaddr(addr);
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PTRS_PER_PUD; i++) {
 | 
			
		||||
		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 | 
			
		||||
		if (!pud_none(*start) &&
 | 
			
		||||
		    !pud_already_checked(prev_pud, start, st->check_wx)) {
 | 
			
		||||
		if (!pud_none(*start)) {
 | 
			
		||||
			if (pud_large(*start) || !pud_present(*start)) {
 | 
			
		||||
				prot = pud_flags(*start);
 | 
			
		||||
				note_page(m, st, __pgprot(prot), 3);
 | 
			
		||||
			} else {
 | 
			
		||||
			} else if (!kasan_page_table(m, st, pud_start)) {
 | 
			
		||||
				walk_pmd_level(m, st, *start,
 | 
			
		||||
					       P + i * PUD_LEVEL_MULT);
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -386,10 +404,10 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
 | 
			
		|||
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
	p4d_t *start;
 | 
			
		||||
	p4d_t *start, *p4d_start;
 | 
			
		||||
	pgprotval_t prot;
 | 
			
		||||
 | 
			
		||||
	start = (p4d_t *)pgd_page_vaddr(addr);
 | 
			
		||||
	p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < PTRS_PER_P4D; i++) {
 | 
			
		||||
		st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
 | 
			
		||||
| 
						 | 
				
			
			@ -397,7 +415,7 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 | 
			
		|||
			if (p4d_large(*start) || !p4d_present(*start)) {
 | 
			
		||||
				prot = p4d_flags(*start);
 | 
			
		||||
				note_page(m, st, __pgprot(prot), 2);
 | 
			
		||||
			} else {
 | 
			
		||||
			} else if (!kasan_page_table(m, st, p4d_start)) {
 | 
			
		||||
				walk_pud_level(m, st, *start,
 | 
			
		||||
					       P + i * P4D_LEVEL_MULT);
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue