forked from mirrors/linux
		
	riscv: mm: Add memory hotplugging support
For an architecture to support memory hotplugging, a couple of callbacks needs to be implemented: arch_add_memory() This callback is responsible for adding the physical memory into the direct map, and call into the memory hotplugging generic code via __add_pages() that adds the corresponding struct page entries, and updates the vmemmap mapping. arch_remove_memory() This is the inverse of the callback above. vmemmap_free() This function tears down the vmemmap mappings (if CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the backing vmemmap pages. Note that for persistent memory, an alternative allocator for the backing pages can be used; The vmem_altmap. This means that when the backing pages are cleared, extra care is needed so that the correct deallocation method is used. arch_get_mappable_range() This functions returns the PA range that the direct map can map. Used by the MHP internals for sanity checks. The page table unmap/teardown functions are heavily based on code from the x86 tree. The same remove_pgd_mapping() function is used in both vmemmap_free() and arch_remove_memory(), but in the latter function the backing pages are not removed. Signed-off-by: Björn Töpel <bjorn@rivosinc.com> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> Link: https://lore.kernel.org/r/20240605114100.315918-7-bjorn@kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
		
							parent
							
								
									6e6c5e21b8
								
							
						
					
					
						commit
						c75a74f4ba
					
				
					 1 changed files with 267 additions and 0 deletions
				
			
		|  | @ -1533,3 +1533,270 @@ struct execmem_info __init *execmem_arch_setup(void) | ||||||
| } | } | ||||||
| #endif /* CONFIG_MMU */ | #endif /* CONFIG_MMU */ | ||||||
| #endif /* CONFIG_EXECMEM */ | #endif /* CONFIG_EXECMEM */ | ||||||
|  | 
 | ||||||
|  | #ifdef CONFIG_MEMORY_HOTPLUG | ||||||
|  | static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) | ||||||
|  | { | ||||||
|  | 	struct page *page = pmd_page(*pmd); | ||||||
|  | 	struct ptdesc *ptdesc = page_ptdesc(page); | ||||||
|  | 	pte_t *pte; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < PTRS_PER_PTE; i++) { | ||||||
|  | 		pte = pte_start + i; | ||||||
|  | 		if (!pte_none(*pte)) | ||||||
|  | 			return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	pagetable_pte_dtor(ptdesc); | ||||||
|  | 	if (PageReserved(page)) | ||||||
|  | 		free_reserved_page(page); | ||||||
|  | 	else | ||||||
|  | 		pagetable_free(ptdesc); | ||||||
|  | 	pmd_clear(pmd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) | ||||||
|  | { | ||||||
|  | 	struct page *page = pud_page(*pud); | ||||||
|  | 	struct ptdesc *ptdesc = page_ptdesc(page); | ||||||
|  | 	pmd_t *pmd; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < PTRS_PER_PMD; i++) { | ||||||
|  | 		pmd = pmd_start + i; | ||||||
|  | 		if (!pmd_none(*pmd)) | ||||||
|  | 			return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	pagetable_pmd_dtor(ptdesc); | ||||||
|  | 	if (PageReserved(page)) | ||||||
|  | 		free_reserved_page(page); | ||||||
|  | 	else | ||||||
|  | 		pagetable_free(ptdesc); | ||||||
|  | 	pud_clear(pud); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) | ||||||
|  | { | ||||||
|  | 	struct page *page = p4d_page(*p4d); | ||||||
|  | 	pud_t *pud; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < PTRS_PER_PUD; i++) { | ||||||
|  | 		pud = pud_start + i; | ||||||
|  | 		if (!pud_none(*pud)) | ||||||
|  | 			return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (PageReserved(page)) | ||||||
|  | 		free_reserved_page(page); | ||||||
|  | 	else | ||||||
|  | 		free_pages((unsigned long)page_address(page), 0); | ||||||
|  | 	p4d_clear(p4d); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit free_vmemmap_storage(struct page *page, size_t size, | ||||||
|  | 					   struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	int order = get_order(size); | ||||||
|  | 
 | ||||||
|  | 	if (altmap) { | ||||||
|  | 		vmem_altmap_free(altmap, size >> PAGE_SHIFT); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (PageReserved(page)) { | ||||||
|  | 		unsigned int nr_pages = 1 << order; | ||||||
|  | 
 | ||||||
|  | 		while (nr_pages--) | ||||||
|  | 			free_reserved_page(page++); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	free_pages((unsigned long)page_address(page), order); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, | ||||||
|  | 					 bool is_vmemmap, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	unsigned long next; | ||||||
|  | 	pte_t *ptep, pte; | ||||||
|  | 
 | ||||||
|  | 	for (; addr < end; addr = next) { | ||||||
|  | 		next = (addr + PAGE_SIZE) & PAGE_MASK; | ||||||
|  | 		if (next > end) | ||||||
|  | 			next = end; | ||||||
|  | 
 | ||||||
|  | 		ptep = pte_base + pte_index(addr); | ||||||
|  | 		pte = ptep_get(ptep); | ||||||
|  | 		if (!pte_present(*ptep)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		pte_clear(&init_mm, addr, ptep); | ||||||
|  | 		if (is_vmemmap) | ||||||
|  | 			free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, | ||||||
|  | 					 bool is_vmemmap, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	unsigned long next; | ||||||
|  | 	pte_t *pte_base; | ||||||
|  | 	pmd_t *pmdp, pmd; | ||||||
|  | 
 | ||||||
|  | 	for (; addr < end; addr = next) { | ||||||
|  | 		next = pmd_addr_end(addr, end); | ||||||
|  | 		pmdp = pmd_base + pmd_index(addr); | ||||||
|  | 		pmd = pmdp_get(pmdp); | ||||||
|  | 		if (!pmd_present(pmd)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if (pmd_leaf(pmd)) { | ||||||
|  | 			pmd_clear(pmdp); | ||||||
|  | 			if (is_vmemmap) | ||||||
|  | 				free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		pte_base = (pte_t *)pmd_page_vaddr(*pmdp); | ||||||
|  | 		remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); | ||||||
|  | 		free_pte_table(pte_base, pmdp); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, | ||||||
|  | 					 bool is_vmemmap, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	unsigned long next; | ||||||
|  | 	pud_t *pudp, pud; | ||||||
|  | 	pmd_t *pmd_base; | ||||||
|  | 
 | ||||||
|  | 	for (; addr < end; addr = next) { | ||||||
|  | 		next = pud_addr_end(addr, end); | ||||||
|  | 		pudp = pud_base + pud_index(addr); | ||||||
|  | 		pud = pudp_get(pudp); | ||||||
|  | 		if (!pud_present(pud)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if (pud_leaf(pud)) { | ||||||
|  | 			if (pgtable_l4_enabled) { | ||||||
|  | 				pud_clear(pudp); | ||||||
|  | 				if (is_vmemmap) | ||||||
|  | 					free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); | ||||||
|  | 			} | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		pmd_base = pmd_offset(pudp, 0); | ||||||
|  | 		remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); | ||||||
|  | 
 | ||||||
|  | 		if (pgtable_l4_enabled) | ||||||
|  | 			free_pmd_table(pmd_base, pudp); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, | ||||||
|  | 					 bool is_vmemmap, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	unsigned long next; | ||||||
|  | 	p4d_t *p4dp, p4d; | ||||||
|  | 	pud_t *pud_base; | ||||||
|  | 
 | ||||||
|  | 	for (; addr < end; addr = next) { | ||||||
|  | 		next = p4d_addr_end(addr, end); | ||||||
|  | 		p4dp = p4d_base + p4d_index(addr); | ||||||
|  | 		p4d = p4dp_get(p4dp); | ||||||
|  | 		if (!p4d_present(p4d)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if (p4d_leaf(p4d)) { | ||||||
|  | 			if (pgtable_l5_enabled) { | ||||||
|  | 				p4d_clear(p4dp); | ||||||
|  | 				if (is_vmemmap) | ||||||
|  | 					free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap); | ||||||
|  | 			} | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		pud_base = pud_offset(p4dp, 0); | ||||||
|  | 		remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap); | ||||||
|  | 
 | ||||||
|  | 		if (pgtable_l5_enabled) | ||||||
|  | 			free_pud_table(pud_base, p4dp); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap, | ||||||
|  | 					 struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	unsigned long addr, next; | ||||||
|  | 	p4d_t *p4d_base; | ||||||
|  | 	pgd_t *pgd; | ||||||
|  | 
 | ||||||
|  | 	for (addr = va; addr < end; addr = next) { | ||||||
|  | 		next = pgd_addr_end(addr, end); | ||||||
|  | 		pgd = pgd_offset_k(addr); | ||||||
|  | 
 | ||||||
|  | 		if (!pgd_present(*pgd)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if (pgd_leaf(*pgd)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		p4d_base = p4d_offset(pgd, 0); | ||||||
|  | 		remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	flush_tlb_all(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __meminit remove_linear_mapping(phys_addr_t start, u64 size) | ||||||
|  | { | ||||||
|  | 	unsigned long va = (unsigned long)__va(start); | ||||||
|  | 	unsigned long end = (unsigned long)__va(start + size); | ||||||
|  | 
 | ||||||
|  | 	remove_pgd_mapping(va, end, false, NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct range arch_get_mappable_range(void) | ||||||
|  | { | ||||||
|  | 	struct range mhp_range; | ||||||
|  | 
 | ||||||
|  | 	mhp_range.start = __pa(PAGE_OFFSET); | ||||||
|  | 	mhp_range.end = __pa(PAGE_END - 1); | ||||||
|  | 	return mhp_range; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	create_linear_mapping_range(start, start + size, 0, ¶ms->pgprot); | ||||||
|  | 	ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); | ||||||
|  | 	if (ret) { | ||||||
|  | 		remove_linear_mapping(start, size); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	max_pfn = PFN_UP(start + size); | ||||||
|  | 	max_low_pfn = max_pfn; | ||||||
|  | 
 | ||||||
|  |  out: | ||||||
|  | 	flush_tlb_all(); | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap); | ||||||
|  | 	remove_linear_mapping(start, size); | ||||||
|  | 	flush_tlb_all(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) | ||||||
|  | { | ||||||
|  | 	remove_pgd_mapping(start, end, true, altmap); | ||||||
|  | } | ||||||
|  | #endif /* CONFIG_MEMORY_HOTPLUG */ | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Björn Töpel
						Björn Töpel