forked from mirrors/linux
		
	s390/mm: implement 5 level pages tables
Add the logic to upgrade the page table for a 64-bit process to five levels. This increases the TASK_SIZE from 8PB to 16EB-4K. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
		
							parent
							
								
									16ddcc34b8
								
							
						
					
					
						commit
						1aea9b3f92
					
				
					 15 changed files with 289 additions and 72 deletions
				
			
		|  | @ -184,7 +184,7 @@ config SCHED_OMIT_FRAME_POINTER | |||
| 
 | ||||
| config PGTABLE_LEVELS | ||||
| 	int | ||||
| 	default 4 | ||||
| 	default 5 | ||||
| 
 | ||||
| source "init/Kconfig" | ||||
| 
 | ||||
|  |  | |||
|  | @ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t; | |||
| typedef struct { unsigned long pte; } pte_t; | ||||
| typedef struct { unsigned long pmd; } pmd_t; | ||||
| typedef struct { unsigned long pud; } pud_t; | ||||
| typedef struct { unsigned long p4d; } p4d_t; | ||||
| typedef struct { unsigned long pgd; } pgd_t; | ||||
| typedef pte_t *pgtable_t; | ||||
| 
 | ||||
|  | @ -82,12 +83,14 @@ typedef pte_t *pgtable_t; | |||
| #define pte_val(x)	((x).pte) | ||||
| #define pmd_val(x)	((x).pmd) | ||||
| #define pud_val(x)	((x).pud) | ||||
| #define p4d_val(x)	((x).p4d) | ||||
| #define pgd_val(x)      ((x).pgd) | ||||
| 
 | ||||
| #define __pgste(x)	((pgste_t) { (x) } ) | ||||
| #define __pte(x)        ((pte_t) { (x) } ) | ||||
| #define __pmd(x)        ((pmd_t) { (x) } ) | ||||
| #define __pud(x)	((pud_t) { (x) } ) | ||||
| #define __p4d(x)	((p4d_t) { (x) } ) | ||||
| #define __pgd(x)        ((pgd_t) { (x) } ) | ||||
| #define __pgprot(x)     ((pgprot_t) { (x) } ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) | |||
| 		return _SEGMENT_ENTRY_EMPTY; | ||||
| 	if (mm->context.asce_limit <= (1UL << 42)) | ||||
| 		return _REGION3_ENTRY_EMPTY; | ||||
| 	return _REGION2_ENTRY_EMPTY; | ||||
| 	if (mm->context.asce_limit <= (1UL << 53)) | ||||
| 		return _REGION2_ENTRY_EMPTY; | ||||
| 	return _REGION1_ENTRY_EMPTY; | ||||
| } | ||||
| 
 | ||||
| int crst_table_upgrade(struct mm_struct *); | ||||
| int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); | ||||
| void crst_table_downgrade(struct mm_struct *); | ||||
| 
 | ||||
| static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) | ||||
| { | ||||
| 	unsigned long *table = crst_table_alloc(mm); | ||||
| 
 | ||||
| 	if (table) | ||||
| 		crst_table_init(table, _REGION2_ENTRY_EMPTY); | ||||
| 	return (p4d_t *) table; | ||||
| } | ||||
| #define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) | ||||
| 
 | ||||
| static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) | ||||
| { | ||||
| 	unsigned long *table = crst_table_alloc(mm); | ||||
|  | @ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |||
| 	crst_table_free(mm, (unsigned long *) pmd); | ||||
| } | ||||
| 
 | ||||
| static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | ||||
| static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) | ||||
| { | ||||
| 	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); | ||||
| 	pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); | ||||
| } | ||||
| 
 | ||||
| static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) | ||||
| { | ||||
| 	p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); | ||||
| } | ||||
| 
 | ||||
| static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | ||||
|  |  | |||
|  | @ -24,7 +24,6 @@ | |||
|  * the S390 page table tree. | ||||
|  */ | ||||
| #ifndef __ASSEMBLY__ | ||||
| #include <asm-generic/5level-fixup.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/mm_types.h> | ||||
| #include <linux/page-flags.h> | ||||
|  | @ -87,12 +86,15 @@ extern unsigned long zero_page_mask; | |||
|  */ | ||||
| #define PMD_SHIFT	20 | ||||
| #define PUD_SHIFT	31 | ||||
| #define PGDIR_SHIFT	42 | ||||
| #define P4D_SHIFT	42 | ||||
| #define PGDIR_SHIFT	53 | ||||
| 
 | ||||
| #define PMD_SIZE        (1UL << PMD_SHIFT) | ||||
| #define PMD_MASK        (~(PMD_SIZE-1)) | ||||
| #define PUD_SIZE	(1UL << PUD_SHIFT) | ||||
| #define PUD_MASK	(~(PUD_SIZE-1)) | ||||
| #define P4D_SIZE	(1UL << P4D_SHIFT) | ||||
| #define P4D_MASK	(~(P4D_SIZE-1)) | ||||
| #define PGDIR_SIZE	(1UL << PGDIR_SHIFT) | ||||
| #define PGDIR_MASK	(~(PGDIR_SIZE-1)) | ||||
| 
 | ||||
|  | @ -105,6 +107,7 @@ extern unsigned long zero_page_mask; | |||
| #define PTRS_PER_PTE	256 | ||||
| #define PTRS_PER_PMD	2048 | ||||
| #define PTRS_PER_PUD	2048 | ||||
| #define PTRS_PER_P4D	2048 | ||||
| #define PTRS_PER_PGD	2048 | ||||
| 
 | ||||
| #define FIRST_USER_ADDRESS  0UL | ||||
|  | @ -115,6 +118,8 @@ extern unsigned long zero_page_mask; | |||
| 	printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) | ||||
| #define pud_ERROR(e) \ | ||||
| 	printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) | ||||
| #define p4d_ERROR(e) \ | ||||
| 	printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e)) | ||||
| #define pgd_ERROR(e) \ | ||||
| 	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) | ||||
| 
 | ||||
|  | @ -310,8 +315,8 @@ static inline int is_module_addr(void *addr) | |||
| #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ | ||||
| #endif | ||||
| 
 | ||||
| #define _REGION_ENTRY_BITS	 0xfffffffffffff227UL | ||||
| #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL | ||||
| #define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL | ||||
| #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL | ||||
| 
 | ||||
| /* Bits in the segment table entry */ | ||||
| #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL | ||||
|  | @ -564,14 +569,14 @@ static inline void crdte(unsigned long old, unsigned long new, | |||
|  */ | ||||
| static inline int pgd_present(pgd_t pgd) | ||||
| { | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1) | ||||
| 		return 1; | ||||
| 	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; | ||||
| } | ||||
| 
 | ||||
| static inline int pgd_none(pgd_t pgd) | ||||
| { | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1) | ||||
| 		return 0; | ||||
| 	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; | ||||
| } | ||||
|  | @ -589,6 +594,28 @@ static inline int pgd_bad(pgd_t pgd) | |||
| 	return (pgd_val(pgd) & mask) != 0; | ||||
| } | ||||
| 
 | ||||
| static inline int p4d_present(p4d_t p4d) | ||||
| { | ||||
| 	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||||
| 		return 1; | ||||
| 	return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL; | ||||
| } | ||||
| 
 | ||||
| static inline int p4d_none(p4d_t p4d) | ||||
| { | ||||
| 	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||||
| 		return 0; | ||||
| 	return p4d_val(p4d) == _REGION2_ENTRY_EMPTY; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long p4d_pfn(p4d_t p4d) | ||||
| { | ||||
| 	unsigned long origin_mask; | ||||
| 
 | ||||
| 	origin_mask = _REGION_ENTRY_ORIGIN; | ||||
| 	return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT; | ||||
| } | ||||
| 
 | ||||
| static inline int pud_present(pud_t pud) | ||||
| { | ||||
| 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | ||||
|  | @ -641,6 +668,13 @@ static inline int pud_bad(pud_t pud) | |||
| 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; | ||||
| } | ||||
| 
 | ||||
| static inline int p4d_bad(p4d_t p4d) | ||||
| { | ||||
| 	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | ||||
| 		return pud_bad(__pud(p4d_val(p4d))); | ||||
| 	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; | ||||
| } | ||||
| 
 | ||||
| static inline int pmd_present(pmd_t pmd) | ||||
| { | ||||
| 	return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; | ||||
|  | @ -794,8 +828,14 @@ static inline int pte_unused(pte_t pte) | |||
| 
 | ||||
| static inline void pgd_clear(pgd_t *pgd) | ||||
| { | ||||
| 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | ||||
| 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) | ||||
| 		pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; | ||||
| } | ||||
| 
 | ||||
| static inline void p4d_clear(p4d_t *p4d) | ||||
| { | ||||
| 	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; | ||||
| } | ||||
| 
 | ||||
| static inline void pud_clear(pud_t *pud) | ||||
|  | @ -1089,6 +1129,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) | |||
| } | ||||
| 
 | ||||
| #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) | ||||
| #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) | ||||
| #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) | ||||
| #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) | ||||
| #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) | ||||
|  | @ -1098,19 +1139,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) | |||
| 
 | ||||
| #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) | ||||
| #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) | ||||
| #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) | ||||
| #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) | ||||
| 
 | ||||
| static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) | ||||
| static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) | ||||
| { | ||||
| 	pud_t *pud = (pud_t *) pgd; | ||||
| 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		pud = (pud_t *) pgd_deref(*pgd); | ||||
| 	return pud  + pud_index(address); | ||||
| 	p4d_t *p4d = (p4d_t *) pgd; | ||||
| 
 | ||||
| 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) | ||||
| 		p4d = (p4d_t *) pgd_deref(*pgd); | ||||
| 	return p4d + p4d_index(address); | ||||
| } | ||||
| 
 | ||||
| static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) | ||||
| { | ||||
| 	pud_t *pud = (pud_t *) p4d; | ||||
| 
 | ||||
| 	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		pud = (pud_t *) p4d_deref(*p4d); | ||||
| 	return pud + pud_index(address); | ||||
| } | ||||
| 
 | ||||
| static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | ||||
| { | ||||
| 	pmd_t *pmd = (pmd_t *) pud; | ||||
| 
 | ||||
| 	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | ||||
| 		pmd = (pmd_t *) pud_deref(*pud); | ||||
| 	return pmd + pmd_index(address); | ||||
|  | @ -1122,6 +1175,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | |||
| 
 | ||||
| #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) | ||||
| #define pud_page(pud) pfn_to_page(pud_pfn(pud)) | ||||
| #define p4d_page(pud) pfn_to_page(p4d_pfn(p4d)) | ||||
| 
 | ||||
| /* Find an entry in the lowest level page table.. */ | ||||
| #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) | ||||
|  |  | |||
|  | @ -92,11 +92,11 @@ extern void execve_tail(void); | |||
|  */ | ||||
| 
 | ||||
| #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \ | ||||
| 					(1UL << 31) : (1UL << 53)) | ||||
| 					(1UL << 31) : -PAGE_SIZE) | ||||
| #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \ | ||||
| 					(1UL << 30) : (1UL << 41)) | ||||
| #define TASK_SIZE		TASK_SIZE_OF(current) | ||||
| #define TASK_SIZE_MAX		(1UL << 53) | ||||
| #define TASK_SIZE_MAX		(-PAGE_SIZE) | ||||
| 
 | ||||
| #define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \ | ||||
| 					(1UL << 31) : (1UL << 42)) | ||||
|  |  | |||
|  | @ -136,6 +136,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, | |||
| 	tlb_remove_table(tlb, pmd); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * p4d_free_tlb frees a pud table and clears the CRSTE for the | ||||
|  * region second table entry from the tlb. | ||||
|  * If the mm uses a four level page table the single p4d is freed | ||||
|  * as the pgd. p4d_free_tlb checks the asce_limit against 8PB | ||||
|  * to avoid the double free of the p4d in this case. | ||||
|  */ | ||||
| static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, | ||||
| 				unsigned long address) | ||||
| { | ||||
| 	if (tlb->mm->context.asce_limit <= (1UL << 53)) | ||||
| 		return; | ||||
| 	tlb_remove_table(tlb, p4d); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * pud_free_tlb frees a pud table and clears the CRSTE for the | ||||
|  * region third table entry from the tlb. | ||||
|  |  | |||
|  | @ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, | |||
| } | ||||
| 
 | ||||
| static void walk_pud_level(struct seq_file *m, struct pg_state *st, | ||||
| 			   pgd_t *pgd, unsigned long addr) | ||||
| 			   p4d_t *p4d, unsigned long addr) | ||||
| { | ||||
| 	unsigned int prot; | ||||
| 	pud_t *pud; | ||||
|  | @ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, | |||
| 
 | ||||
| 	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { | ||||
| 		st->current_address = addr; | ||||
| 		pud = pud_offset(pgd, addr); | ||||
| 		pud = pud_offset(p4d, addr); | ||||
| 		if (!pud_none(*pud)) | ||||
| 			if (pud_large(*pud)) { | ||||
| 				prot = pud_val(*pud) & | ||||
|  | @ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void walk_p4d_level(struct seq_file *m, struct pg_state *st, | ||||
| 			   pgd_t *pgd, unsigned long addr) | ||||
| { | ||||
| 	p4d_t *p4d; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { | ||||
| 		st->current_address = addr; | ||||
| 		p4d = p4d_offset(pgd, addr); | ||||
| 		if (!p4d_none(*p4d)) | ||||
| 			walk_pud_level(m, st, p4d, addr); | ||||
| 		else | ||||
| 			note_page(m, st, _PAGE_INVALID, 2); | ||||
| 		addr += P4D_SIZE; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void walk_pgd_level(struct seq_file *m) | ||||
| { | ||||
| 	unsigned long addr = 0; | ||||
|  | @ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m) | |||
| 		st.current_address = addr; | ||||
| 		pgd = pgd_offset_k(addr); | ||||
| 		if (!pgd_none(*pgd)) | ||||
| 			walk_pud_level(m, &st, pgd, addr); | ||||
| 			walk_p4d_level(m, &st, pgd, addr); | ||||
| 		else | ||||
| 			note_page(m, &st, _PAGE_INVALID, 1); | ||||
| 		addr += PGDIR_SIZE; | ||||
|  |  | |||
|  | @ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | |||
| 	unsigned long *table; | ||||
| 	spinlock_t *ptl; | ||||
| 	pgd_t *pgd; | ||||
| 	p4d_t *p4d; | ||||
| 	pud_t *pud; | ||||
| 	pmd_t *pmd; | ||||
| 	int rc; | ||||
|  | @ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | |||
| 	mm = gmap->mm; | ||||
| 	pgd = pgd_offset(mm, vmaddr); | ||||
| 	VM_BUG_ON(pgd_none(*pgd)); | ||||
| 	pud = pud_offset(pgd, vmaddr); | ||||
| 	p4d = p4d_offset(pgd, vmaddr); | ||||
| 	VM_BUG_ON(p4d_none(*p4d)); | ||||
| 	pud = pud_offset(p4d, vmaddr); | ||||
| 	VM_BUG_ON(pud_none(*pud)); | ||||
| 	/* large puds cannot yet be handled */ | ||||
| 	if (pud_large(*pud)) | ||||
|  |  | |||
|  | @ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, | |||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | ||||
| static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, | ||||
| 		unsigned long end, int write, struct page **pages, int *nr) | ||||
| { | ||||
| 	unsigned long next; | ||||
| 	pud_t *pudp, pud; | ||||
| 
 | ||||
| 	pudp = (pud_t *) pgdp; | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		pudp = (pud_t *) pgd_deref(pgd); | ||||
| 	pudp = (pud_t *) p4dp; | ||||
| 	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | ||||
| 		pudp = (pud_t *) p4d_deref(p4d); | ||||
| 	pudp += pud_index(addr); | ||||
| 	do { | ||||
| 		pud = *pudp; | ||||
|  | @ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | |||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | ||||
| 		unsigned long end, int write, struct page **pages, int *nr) | ||||
| { | ||||
| 	unsigned long next; | ||||
| 	p4d_t *p4dp, p4d; | ||||
| 
 | ||||
| 	p4dp = (p4d_t *) pgdp; | ||||
| 	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) | ||||
| 		p4dp = (p4d_t *) pgd_deref(pgd); | ||||
| 	p4dp += p4d_index(addr); | ||||
| 	do { | ||||
| 		p4d = *p4dp; | ||||
| 		barrier(); | ||||
| 		next = p4d_addr_end(addr, end); | ||||
| 		if (p4d_none(p4d)) | ||||
| 			return 0; | ||||
| 		if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) | ||||
| 			return 0; | ||||
| 	} while (p4dp++, addr = next, addr != end); | ||||
| 
 | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||||
|  * back to the regular GUP. | ||||
|  | @ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
| 		next = pgd_addr_end(addr, end); | ||||
| 		if (pgd_none(pgd)) | ||||
| 			break; | ||||
| 		if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) | ||||
| 		if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) | ||||
| 			break; | ||||
| 	} while (pgdp++, addr = next, addr != end); | ||||
| 	local_irq_restore(flags); | ||||
|  |  | |||
|  | @ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, | |||
| 			unsigned long addr, unsigned long sz) | ||||
| { | ||||
| 	pgd_t *pgdp; | ||||
| 	p4d_t *p4dp; | ||||
| 	pud_t *pudp; | ||||
| 	pmd_t *pmdp = NULL; | ||||
| 
 | ||||
| 	pgdp = pgd_offset(mm, addr); | ||||
| 	pudp = pud_alloc(mm, pgdp, addr); | ||||
| 	if (pudp) { | ||||
| 		if (sz == PUD_SIZE) | ||||
| 			return (pte_t *) pudp; | ||||
| 		else if (sz == PMD_SIZE) | ||||
| 			pmdp = pmd_alloc(mm, pudp, addr); | ||||
| 	p4dp = p4d_alloc(mm, pgdp, addr); | ||||
| 	if (p4dp) { | ||||
| 		pudp = pud_alloc(mm, p4dp, addr); | ||||
| 		if (pudp) { | ||||
| 			if (sz == PUD_SIZE) | ||||
| 				return (pte_t *) pudp; | ||||
| 			else if (sz == PMD_SIZE) | ||||
| 				pmdp = pmd_alloc(mm, pudp, addr); | ||||
| 		} | ||||
| 	} | ||||
| 	return (pte_t *) pmdp; | ||||
| } | ||||
|  | @ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, | |||
| pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||||
| { | ||||
| 	pgd_t *pgdp; | ||||
| 	p4d_t *p4dp; | ||||
| 	pud_t *pudp; | ||||
| 	pmd_t *pmdp = NULL; | ||||
| 
 | ||||
| 	pgdp = pgd_offset(mm, addr); | ||||
| 	if (pgd_present(*pgdp)) { | ||||
| 		pudp = pud_offset(pgdp, addr); | ||||
| 		if (pud_present(*pudp)) { | ||||
| 			if (pud_large(*pudp)) | ||||
| 				return (pte_t *) pudp; | ||||
| 			pmdp = pmd_offset(pudp, addr); | ||||
| 		p4dp = p4d_offset(pgdp, addr); | ||||
| 		if (p4d_present(*p4dp)) { | ||||
| 			pudp = pud_offset(p4dp, addr); | ||||
| 			if (pud_present(*pudp)) { | ||||
| 				if (pud_large(*pudp)) | ||||
| 					return (pte_t *) pudp; | ||||
| 				pmdp = pmd_offset(pudp, addr); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return (pte_t *) pmdp; | ||||
|  |  | |||
|  | @ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 
 | ||||
| check_asce_limit: | ||||
| 	if (addr + len > current->mm->context.asce_limit) { | ||||
| 		rc = crst_table_upgrade(mm); | ||||
| 		rc = crst_table_upgrade(mm, addr + len); | ||||
| 		if (rc) | ||||
| 			return (unsigned long) rc; | ||||
| 	} | ||||
|  | @ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 
 | ||||
| check_asce_limit: | ||||
| 	if (addr + len > current->mm->context.asce_limit) { | ||||
| 		rc = crst_table_upgrade(mm); | ||||
| 		rc = crst_table_upgrade(mm, addr + len); | ||||
| 		if (rc) | ||||
| 			return (unsigned long) rc; | ||||
| 	} | ||||
|  |  | |||
|  | @ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, | |||
| 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); | ||||
| } | ||||
| 
 | ||||
| static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, | ||||
| static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, | ||||
| 			  unsigned long flags) | ||||
| { | ||||
| 	unsigned long next; | ||||
| 	pud_t *pudp; | ||||
| 	int rc = 0; | ||||
| 
 | ||||
| 	pudp = pud_offset(pgd, addr); | ||||
| 	pudp = pud_offset(p4d, addr); | ||||
| 	do { | ||||
| 		if (pud_none(*pudp)) | ||||
| 			return -EINVAL; | ||||
|  | @ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, | ||||
| 			  unsigned long flags) | ||||
| { | ||||
| 	unsigned long next; | ||||
| 	p4d_t *p4dp; | ||||
| 	int rc = 0; | ||||
| 
 | ||||
| 	p4dp = p4d_offset(pgd, addr); | ||||
| 	do { | ||||
| 		if (p4d_none(*p4dp)) | ||||
| 			return -EINVAL; | ||||
| 		next = p4d_addr_end(addr, end); | ||||
| 		rc = walk_pud_level(p4dp, addr, next, flags); | ||||
| 		p4dp++; | ||||
| 		addr = next; | ||||
| 		cond_resched(); | ||||
| 	} while (addr < end && !rc); | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| static DEFINE_MUTEX(cpa_mutex); | ||||
| 
 | ||||
| static int change_page_attr(unsigned long addr, unsigned long end, | ||||
|  | @ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end, | |||
| 		if (pgd_none(*pgdp)) | ||||
| 			break; | ||||
| 		next = pgd_addr_end(addr, end); | ||||
| 		rc = walk_pud_level(pgdp, addr, next, flags); | ||||
| 		rc = walk_p4d_level(pgdp, addr, next, flags); | ||||
| 		if (rc) | ||||
| 			break; | ||||
| 		cond_resched(); | ||||
|  | @ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 	unsigned long address; | ||||
| 	int nr, i, j; | ||||
| 	pgd_t *pgd; | ||||
| 	p4d_t *p4d; | ||||
| 	pud_t *pud; | ||||
| 	pmd_t *pmd; | ||||
| 	pte_t *pte; | ||||
|  | @ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) | |||
| 	for (i = 0; i < numpages;) { | ||||
| 		address = page_to_phys(page + i); | ||||
| 		pgd = pgd_offset_k(address); | ||||
| 		pud = pud_offset(pgd, address); | ||||
| 		p4d = p4d_offset(pgd, address); | ||||
| 		pud = pud_offset(p4d, address); | ||||
| 		pmd = pmd_offset(pud, address); | ||||
| 		pte = pte_offset_kernel(pmd, address); | ||||
| 		nr = (unsigned long)pte >> ilog2(sizeof(long)); | ||||
|  |  | |||
|  | @ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg) | |||
| 	__tlb_flush_local(); | ||||
| } | ||||
| 
 | ||||
| int crst_table_upgrade(struct mm_struct *mm) | ||||
| int crst_table_upgrade(struct mm_struct *mm, unsigned long end) | ||||
| { | ||||
| 	unsigned long *table, *pgd; | ||||
| 	int rc, notify; | ||||
| 
 | ||||
| 	/* upgrade should only happen from 3 to 4 levels */ | ||||
| 	BUG_ON(mm->context.asce_limit != (1UL << 42)); | ||||
| 
 | ||||
| 	table = crst_table_alloc(mm); | ||||
| 	if (!table) | ||||
| 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ | ||||
| 	BUG_ON(mm->context.asce_limit < (1UL << 42)); | ||||
| 	if (end >= TASK_SIZE_MAX) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	spin_lock_bh(&mm->page_table_lock); | ||||
| 	pgd = (unsigned long *) mm->pgd; | ||||
| 	crst_table_init(table, _REGION2_ENTRY_EMPTY); | ||||
| 	pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | ||||
| 	mm->pgd = (pgd_t *) table; | ||||
| 	mm->context.asce_limit = 1UL << 53; | ||||
| 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | | ||||
| 			   _ASCE_USER_BITS | _ASCE_TYPE_REGION2; | ||||
| 	spin_unlock_bh(&mm->page_table_lock); | ||||
| 
 | ||||
| 	on_each_cpu(__crst_table_upgrade, mm, 0); | ||||
| 	return 0; | ||||
| 	rc = 0; | ||||
| 	notify = 0; | ||||
| 	while (mm->context.asce_limit < end) { | ||||
| 		table = crst_table_alloc(mm); | ||||
| 		if (!table) { | ||||
| 			rc = -ENOMEM; | ||||
| 			break; | ||||
| 		} | ||||
| 		spin_lock_bh(&mm->page_table_lock); | ||||
| 		pgd = (unsigned long *) mm->pgd; | ||||
| 		if (mm->context.asce_limit == (1UL << 42)) { | ||||
| 			crst_table_init(table, _REGION2_ENTRY_EMPTY); | ||||
| 			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); | ||||
| 			mm->pgd = (pgd_t *) table; | ||||
| 			mm->context.asce_limit = 1UL << 53; | ||||
| 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | | ||||
| 				_ASCE_USER_BITS | _ASCE_TYPE_REGION2; | ||||
| 		} else { | ||||
| 			crst_table_init(table, _REGION1_ENTRY_EMPTY); | ||||
| 			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); | ||||
| 			mm->pgd = (pgd_t *) table; | ||||
| 			mm->context.asce_limit = -PAGE_SIZE; | ||||
| 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | | ||||
| 				_ASCE_USER_BITS | _ASCE_TYPE_REGION1; | ||||
| 		} | ||||
| 		notify = 1; | ||||
| 		spin_unlock_bh(&mm->page_table_lock); | ||||
| 	} | ||||
| 	if (notify) | ||||
| 		on_each_cpu(__crst_table_upgrade, mm, 0); | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| void crst_table_downgrade(struct mm_struct *mm) | ||||
|  | @ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table) | |||
| 	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||||
| 
 | ||||
| 	switch (mask) { | ||||
| 	case 0:		/* pmd or pud */ | ||||
| 	case 0:		/* pmd, pud, or p4d */ | ||||
| 		free_pages((unsigned long) table, 2); | ||||
| 		break; | ||||
| 	case 1:		/* lower 2K of a 4K page table */ | ||||
|  |  | |||
|  | @ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) | |||
| { | ||||
| 	spinlock_t *ptl; | ||||
| 	pgd_t *pgd; | ||||
| 	p4d_t *p4d; | ||||
| 	pud_t *pud; | ||||
| 	pmd_t *pmd; | ||||
| 	pgste_t pgste; | ||||
|  | @ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) | |||
| 	bool dirty; | ||||
| 
 | ||||
| 	pgd = pgd_offset(mm, addr); | ||||
| 	pud = pud_alloc(mm, pgd, addr); | ||||
| 	p4d = p4d_alloc(mm, pgd, addr); | ||||
| 	if (!p4d) | ||||
| 		return false; | ||||
| 	pud = pud_alloc(mm, p4d, addr); | ||||
| 	if (!pud) | ||||
| 		return false; | ||||
| 	pmd = pmd_alloc(mm, pud, addr); | ||||
|  |  | |||
|  | @ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order) | |||
| 	return (void *) memblock_alloc(size, size); | ||||
| } | ||||
| 
 | ||||
| static inline p4d_t *vmem_p4d_alloc(void) | ||||
| { | ||||
| 	p4d_t *p4d = NULL; | ||||
| 
 | ||||
| 	p4d = vmem_alloc_pages(2); | ||||
| 	if (!p4d) | ||||
| 		return NULL; | ||||
| 	clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); | ||||
| 	return p4d; | ||||
| } | ||||
| 
 | ||||
| static inline pud_t *vmem_pud_alloc(void) | ||||
| { | ||||
| 	pud_t *pud = NULL; | ||||
|  | @ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) | |||
| 	unsigned long end = start + size; | ||||
| 	unsigned long address = start; | ||||
| 	pgd_t *pg_dir; | ||||
| 	p4d_t *p4_dir; | ||||
| 	pud_t *pu_dir; | ||||
| 	pmd_t *pm_dir; | ||||
| 	pte_t *pt_dir; | ||||
|  | @ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size) | |||
| 	while (address < end) { | ||||
| 		pg_dir = pgd_offset_k(address); | ||||
| 		if (pgd_none(*pg_dir)) { | ||||
| 			p4_dir = vmem_p4d_alloc(); | ||||
| 			if (!p4_dir) | ||||
| 				goto out; | ||||
| 			pgd_populate(&init_mm, pg_dir, p4_dir); | ||||
| 		} | ||||
| 		p4_dir = p4d_offset(pg_dir, address); | ||||
| 		if (p4d_none(*p4_dir)) { | ||||
| 			pu_dir = vmem_pud_alloc(); | ||||
| 			if (!pu_dir) | ||||
| 				goto out; | ||||
| 			pgd_populate(&init_mm, pg_dir, pu_dir); | ||||
| 			p4d_populate(&init_mm, p4_dir, pu_dir); | ||||
| 		} | ||||
| 		pu_dir = pud_offset(pg_dir, address); | ||||
| 		pu_dir = pud_offset(p4_dir, address); | ||||
| 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && | ||||
| 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && | ||||
| 		     !debug_pagealloc_enabled()) { | ||||
|  | @ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
| 	unsigned long end = start + size; | ||||
| 	unsigned long address = start; | ||||
| 	pgd_t *pg_dir; | ||||
| 	p4d_t *p4_dir; | ||||
| 	pud_t *pu_dir; | ||||
| 	pmd_t *pm_dir; | ||||
| 	pte_t *pt_dir; | ||||
|  | @ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size) | |||
| 			address += PGDIR_SIZE; | ||||
| 			continue; | ||||
| 		} | ||||
| 		pu_dir = pud_offset(pg_dir, address); | ||||
| 		p4_dir = p4d_offset(pg_dir, address); | ||||
| 		if (p4d_none(*p4_dir)) { | ||||
| 			address += P4D_SIZE; | ||||
| 			continue; | ||||
| 		} | ||||
| 		pu_dir = pud_offset(p4_dir, address); | ||||
| 		if (pud_none(*pu_dir)) { | ||||
| 			address += PUD_SIZE; | ||||
| 			continue; | ||||
|  | @ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | |||
| 	unsigned long pgt_prot, sgt_prot; | ||||
| 	unsigned long address = start; | ||||
| 	pgd_t *pg_dir; | ||||
| 	p4d_t *p4_dir; | ||||
| 	pud_t *pu_dir; | ||||
| 	pmd_t *pm_dir; | ||||
| 	pte_t *pt_dir; | ||||
|  | @ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | |||
| 	for (address = start; address < end;) { | ||||
| 		pg_dir = pgd_offset_k(address); | ||||
| 		if (pgd_none(*pg_dir)) { | ||||
| 			p4_dir = vmem_p4d_alloc(); | ||||
| 			if (!p4_dir) | ||||
| 				goto out; | ||||
| 			pgd_populate(&init_mm, pg_dir, p4_dir); | ||||
| 		} | ||||
| 
 | ||||
| 		p4_dir = p4d_offset(pg_dir, address); | ||||
| 		if (p4d_none(*p4_dir)) { | ||||
| 			pu_dir = vmem_pud_alloc(); | ||||
| 			if (!pu_dir) | ||||
| 				goto out; | ||||
| 			pgd_populate(&init_mm, pg_dir, pu_dir); | ||||
| 			p4d_populate(&init_mm, p4_dir, pu_dir); | ||||
| 		} | ||||
| 
 | ||||
| 		pu_dir = pud_offset(pg_dir, address); | ||||
| 		pu_dir = pud_offset(p4_dir, address); | ||||
| 		if (pud_none(*pu_dir)) { | ||||
| 			pm_dir = vmem_pmd_alloc(); | ||||
| 			if (!pm_dir) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Martin Schwidefsky
						Martin Schwidefsky