forked from mirrors/linux
		
	khugepaged: introduce 'max_ptes_shared' tunable
'max_ptes_shared' specifies how many pages can be shared across multiple processes. Exceeding the number would block the collapse:: /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared A higher value may increase memory footprint for some workloads. By default, at least half of pages has to be not shared. [colin.king@canonical.com: fix several spelling mistakes] Link: http://lkml.kernel.org/r/20200420084241.65433-1-colin.king@canonical.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Zi Yan <ziy@nvidia.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com> Reviewed-by: Zi Yan <ziy@nvidia.com> Acked-by: Yang Shi <yang.shi@linux.alibaba.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Link: http://lkml.kernel.org/r/20200416160026.16538-9-kirill.shutemov@linux.intel.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									3917c80280
								
							
						
					
					
						commit
						71a2c112a0
					
				
					 4 changed files with 140 additions and 5 deletions
				
			
		|  | @ -220,6 +220,13 @@ memory. A lower value can prevent THPs from being | ||||||
| collapsed, resulting fewer pages being collapsed into | collapsed, resulting fewer pages being collapsed into | ||||||
| THPs, and lower memory access performance. | THPs, and lower memory access performance. | ||||||
| 
 | 
 | ||||||
|  | ``max_ptes_shared`` specifies how many pages can be shared across multiple | ||||||
|  | processes. Exceeding the number would block the collapse:: | ||||||
|  | 
 | ||||||
|  | 	/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared | ||||||
|  | 
 | ||||||
|  | A higher value may increase memory footprint for some workloads. | ||||||
|  | 
 | ||||||
| Boot parameter | Boot parameter | ||||||
| ============== | ============== | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -12,6 +12,8 @@ | ||||||
| 	EM( SCAN_SUCCEED,		"succeeded")			\ | 	EM( SCAN_SUCCEED,		"succeeded")			\ | ||||||
| 	EM( SCAN_PMD_NULL,		"pmd_null")			\ | 	EM( SCAN_PMD_NULL,		"pmd_null")			\ | ||||||
| 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\ | 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\ | ||||||
|  | 	EM( SCAN_EXCEED_SWAP_PTE,	"exceed_swap_pte")		\ | ||||||
|  | 	EM( SCAN_EXCEED_SHARED_PTE,	"exceed_shared_pte")		\ | ||||||
| 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\ | 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\ | ||||||
| 	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\ | 	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\ | ||||||
| 	EM( SCAN_PAGE_RO,		"no_writable_page")		\ | 	EM( SCAN_PAGE_RO,		"no_writable_page")		\ | ||||||
|  | @ -31,7 +33,6 @@ | ||||||
| 	EM( SCAN_DEL_PAGE_LRU,		"could_not_delete_page_from_lru")\ | 	EM( SCAN_DEL_PAGE_LRU,		"could_not_delete_page_from_lru")\ | ||||||
| 	EM( SCAN_ALLOC_HUGE_PAGE_FAIL,	"alloc_huge_page_failed")	\ | 	EM( SCAN_ALLOC_HUGE_PAGE_FAIL,	"alloc_huge_page_failed")	\ | ||||||
| 	EM( SCAN_CGROUP_CHARGE_FAIL,	"ccgroup_charge_failed")	\ | 	EM( SCAN_CGROUP_CHARGE_FAIL,	"ccgroup_charge_failed")	\ | ||||||
| 	EM( SCAN_EXCEED_SWAP_PTE,	"exceed_swap_pte")		\ |  | ||||||
| 	EM( SCAN_TRUNCATED,		"truncated")			\ | 	EM( SCAN_TRUNCATED,		"truncated")			\ | ||||||
| 	EMe(SCAN_PAGE_HAS_PRIVATE,	"page_has_private")		\ | 	EMe(SCAN_PAGE_HAS_PRIVATE,	"page_has_private")		\ | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -28,6 +28,8 @@ enum scan_result { | ||||||
| 	SCAN_SUCCEED, | 	SCAN_SUCCEED, | ||||||
| 	SCAN_PMD_NULL, | 	SCAN_PMD_NULL, | ||||||
| 	SCAN_EXCEED_NONE_PTE, | 	SCAN_EXCEED_NONE_PTE, | ||||||
|  | 	SCAN_EXCEED_SWAP_PTE, | ||||||
|  | 	SCAN_EXCEED_SHARED_PTE, | ||||||
| 	SCAN_PTE_NON_PRESENT, | 	SCAN_PTE_NON_PRESENT, | ||||||
| 	SCAN_PTE_UFFD_WP, | 	SCAN_PTE_UFFD_WP, | ||||||
| 	SCAN_PAGE_RO, | 	SCAN_PAGE_RO, | ||||||
|  | @ -47,7 +49,6 @@ enum scan_result { | ||||||
| 	SCAN_DEL_PAGE_LRU, | 	SCAN_DEL_PAGE_LRU, | ||||||
| 	SCAN_ALLOC_HUGE_PAGE_FAIL, | 	SCAN_ALLOC_HUGE_PAGE_FAIL, | ||||||
| 	SCAN_CGROUP_CHARGE_FAIL, | 	SCAN_CGROUP_CHARGE_FAIL, | ||||||
| 	SCAN_EXCEED_SWAP_PTE, |  | ||||||
| 	SCAN_TRUNCATED, | 	SCAN_TRUNCATED, | ||||||
| 	SCAN_PAGE_HAS_PRIVATE, | 	SCAN_PAGE_HAS_PRIVATE, | ||||||
| }; | }; | ||||||
|  | @ -72,6 +73,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); | ||||||
|  */ |  */ | ||||||
| static unsigned int khugepaged_max_ptes_none __read_mostly; | static unsigned int khugepaged_max_ptes_none __read_mostly; | ||||||
| static unsigned int khugepaged_max_ptes_swap __read_mostly; | static unsigned int khugepaged_max_ptes_swap __read_mostly; | ||||||
|  | static unsigned int khugepaged_max_ptes_shared __read_mostly; | ||||||
| 
 | 
 | ||||||
| #define MM_SLOTS_HASH_BITS 10 | #define MM_SLOTS_HASH_BITS 10 | ||||||
| static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); | static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); | ||||||
|  | @ -291,15 +293,43 @@ static struct kobj_attribute khugepaged_max_ptes_swap_attr = | ||||||
| 	__ATTR(max_ptes_swap, 0644, khugepaged_max_ptes_swap_show, | 	__ATTR(max_ptes_swap, 0644, khugepaged_max_ptes_swap_show, | ||||||
| 	       khugepaged_max_ptes_swap_store); | 	       khugepaged_max_ptes_swap_store); | ||||||
| 
 | 
 | ||||||
|  | static ssize_t khugepaged_max_ptes_shared_show(struct kobject *kobj, | ||||||
|  | 					     struct kobj_attribute *attr, | ||||||
|  | 					     char *buf) | ||||||
|  | { | ||||||
|  | 	return sprintf(buf, "%u\n", khugepaged_max_ptes_shared); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t khugepaged_max_ptes_shared_store(struct kobject *kobj, | ||||||
|  | 					      struct kobj_attribute *attr, | ||||||
|  | 					      const char *buf, size_t count) | ||||||
|  | { | ||||||
|  | 	int err; | ||||||
|  | 	unsigned long max_ptes_shared; | ||||||
|  | 
 | ||||||
|  | 	err  = kstrtoul(buf, 10, &max_ptes_shared); | ||||||
|  | 	if (err || max_ptes_shared > HPAGE_PMD_NR-1) | ||||||
|  | 		return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 	khugepaged_max_ptes_shared = max_ptes_shared; | ||||||
|  | 
 | ||||||
|  | 	return count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct kobj_attribute khugepaged_max_ptes_shared_attr = | ||||||
|  | 	__ATTR(max_ptes_shared, 0644, khugepaged_max_ptes_shared_show, | ||||||
|  | 	       khugepaged_max_ptes_shared_store); | ||||||
|  | 
 | ||||||
| static struct attribute *khugepaged_attr[] = { | static struct attribute *khugepaged_attr[] = { | ||||||
| 	&khugepaged_defrag_attr.attr, | 	&khugepaged_defrag_attr.attr, | ||||||
| 	&khugepaged_max_ptes_none_attr.attr, | 	&khugepaged_max_ptes_none_attr.attr, | ||||||
|  | 	&khugepaged_max_ptes_swap_attr.attr, | ||||||
|  | 	&khugepaged_max_ptes_shared_attr.attr, | ||||||
| 	&pages_to_scan_attr.attr, | 	&pages_to_scan_attr.attr, | ||||||
| 	&pages_collapsed_attr.attr, | 	&pages_collapsed_attr.attr, | ||||||
| 	&full_scans_attr.attr, | 	&full_scans_attr.attr, | ||||||
| 	&scan_sleep_millisecs_attr.attr, | 	&scan_sleep_millisecs_attr.attr, | ||||||
| 	&alloc_sleep_millisecs_attr.attr, | 	&alloc_sleep_millisecs_attr.attr, | ||||||
| 	&khugepaged_max_ptes_swap_attr.attr, |  | ||||||
| 	NULL, | 	NULL, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -359,6 +389,7 @@ int __init khugepaged_init(void) | ||||||
| 	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8; | 	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8; | ||||||
| 	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1; | 	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1; | ||||||
| 	khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8; | 	khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8; | ||||||
|  | 	khugepaged_max_ptes_shared = HPAGE_PMD_NR / 2; | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | @ -557,7 +588,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | ||||||
| { | { | ||||||
| 	struct page *page = NULL; | 	struct page *page = NULL; | ||||||
| 	pte_t *_pte; | 	pte_t *_pte; | ||||||
| 	int none_or_zero = 0, result = 0, referenced = 0; | 	int none_or_zero = 0, shared = 0, result = 0, referenced = 0; | ||||||
| 	bool writable = false; | 	bool writable = false; | ||||||
| 
 | 
 | ||||||
| 	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | 	for (_pte = pte; _pte < pte+HPAGE_PMD_NR; | ||||||
|  | @ -585,6 +616,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | ||||||
| 
 | 
 | ||||||
| 		VM_BUG_ON_PAGE(!PageAnon(page), page); | 		VM_BUG_ON_PAGE(!PageAnon(page), page); | ||||||
| 
 | 
 | ||||||
|  | 		if (page_mapcount(page) > 1 && | ||||||
|  | 				++shared > khugepaged_max_ptes_shared) { | ||||||
|  | 			result = SCAN_EXCEED_SHARED_PTE; | ||||||
|  | 			goto out; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		if (PageCompound(page)) { | 		if (PageCompound(page)) { | ||||||
| 			struct page *p; | 			struct page *p; | ||||||
| 			page = compound_head(page); | 			page = compound_head(page); | ||||||
|  | @ -1168,7 +1205,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | ||||||
| { | { | ||||||
| 	pmd_t *pmd; | 	pmd_t *pmd; | ||||||
| 	pte_t *pte, *_pte; | 	pte_t *pte, *_pte; | ||||||
| 	int ret = 0, none_or_zero = 0, result = 0, referenced = 0; | 	int ret = 0, result = 0, referenced = 0; | ||||||
|  | 	int none_or_zero = 0, shared = 0; | ||||||
| 	struct page *page = NULL; | 	struct page *page = NULL; | ||||||
| 	unsigned long _address; | 	unsigned long _address; | ||||||
| 	spinlock_t *ptl; | 	spinlock_t *ptl; | ||||||
|  | @ -1240,6 +1278,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | ||||||
| 			goto out_unmap; | 			goto out_unmap; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		if (page_mapcount(page) > 1 && | ||||||
|  | 				++shared > khugepaged_max_ptes_shared) { | ||||||
|  | 			result = SCAN_EXCEED_SHARED_PTE; | ||||||
|  | 			goto out_unmap; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		page = compound_head(page); | 		page = compound_head(page); | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
|  |  | ||||||
|  | @ -78,6 +78,7 @@ struct khugepaged_settings { | ||||||
| 	unsigned int scan_sleep_millisecs; | 	unsigned int scan_sleep_millisecs; | ||||||
| 	unsigned int max_ptes_none; | 	unsigned int max_ptes_none; | ||||||
| 	unsigned int max_ptes_swap; | 	unsigned int max_ptes_swap; | ||||||
|  | 	unsigned int max_ptes_shared; | ||||||
| 	unsigned long pages_to_scan; | 	unsigned long pages_to_scan; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -277,6 +278,7 @@ static void write_settings(struct settings *settings) | ||||||
| 			khugepaged->scan_sleep_millisecs); | 			khugepaged->scan_sleep_millisecs); | ||||||
| 	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); | 	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); | ||||||
| 	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); | 	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); | ||||||
|  | 	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); | ||||||
| 	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); | 	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -313,6 +315,7 @@ static void save_settings(void) | ||||||
| 			read_num("khugepaged/scan_sleep_millisecs"), | 			read_num("khugepaged/scan_sleep_millisecs"), | ||||||
| 		.max_ptes_none = read_num("khugepaged/max_ptes_none"), | 		.max_ptes_none = read_num("khugepaged/max_ptes_none"), | ||||||
| 		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"), | 		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"), | ||||||
|  | 		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"), | ||||||
| 		.pages_to_scan = read_num("khugepaged/pages_to_scan"), | 		.pages_to_scan = read_num("khugepaged/pages_to_scan"), | ||||||
| 	}; | 	}; | ||||||
| 	success("OK"); | 	success("OK"); | ||||||
|  | @ -896,12 +899,90 @@ static void collapse_fork_compound(void) | ||||||
| 			fail("Fail"); | 			fail("Fail"); | ||||||
| 		fill_memory(p, 0, page_size); | 		fill_memory(p, 0, page_size); | ||||||
| 
 | 
 | ||||||
|  | 		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); | ||||||
| 		if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) | 		if (wait_for_scan("Collapse PTE table full of compound pages in child", p)) | ||||||
| 			fail("Timeout"); | 			fail("Timeout"); | ||||||
| 		else if (check_huge(p)) | 		else if (check_huge(p)) | ||||||
| 			success("OK"); | 			success("OK"); | ||||||
| 		else | 		else | ||||||
| 			fail("Fail"); | 			fail("Fail"); | ||||||
|  | 		write_num("khugepaged/max_ptes_shared", | ||||||
|  | 				default_settings.khugepaged.max_ptes_shared); | ||||||
|  | 
 | ||||||
|  | 		validate_memory(p, 0, hpage_pmd_size); | ||||||
|  | 		munmap(p, hpage_pmd_size); | ||||||
|  | 		exit(exit_status); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	wait(&wstatus); | ||||||
|  | 	exit_status += WEXITSTATUS(wstatus); | ||||||
|  | 
 | ||||||
|  | 	printf("Check if parent still has huge page..."); | ||||||
|  | 	if (check_huge(p)) | ||||||
|  | 		success("OK"); | ||||||
|  | 	else | ||||||
|  | 		fail("Fail"); | ||||||
|  | 	validate_memory(p, 0, hpage_pmd_size); | ||||||
|  | 	munmap(p, hpage_pmd_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void collapse_max_ptes_shared() | ||||||
|  | { | ||||||
|  | 	int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); | ||||||
|  | 	int wstatus; | ||||||
|  | 	void *p; | ||||||
|  | 
 | ||||||
|  | 	p = alloc_mapping(); | ||||||
|  | 
 | ||||||
|  | 	printf("Allocate huge page..."); | ||||||
|  | 	madvise(p, hpage_pmd_size, MADV_HUGEPAGE); | ||||||
|  | 	fill_memory(p, 0, hpage_pmd_size); | ||||||
|  | 	if (check_huge(p)) | ||||||
|  | 		success("OK"); | ||||||
|  | 	else | ||||||
|  | 		fail("Fail"); | ||||||
|  | 
 | ||||||
|  | 	printf("Share huge page over fork()..."); | ||||||
|  | 	if (!fork()) { | ||||||
|  | 		/* Do not touch settings on child exit */ | ||||||
|  | 		skip_settings_restore = true; | ||||||
|  | 		exit_status = 0; | ||||||
|  | 
 | ||||||
|  | 		if (check_huge(p)) | ||||||
|  | 			success("OK"); | ||||||
|  | 		else | ||||||
|  | 			fail("Fail"); | ||||||
|  | 
 | ||||||
|  | 		printf("Trigger CoW on page %d of %d...", | ||||||
|  | 				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr); | ||||||
|  | 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size); | ||||||
|  | 		if (!check_huge(p)) | ||||||
|  | 			success("OK"); | ||||||
|  | 		else | ||||||
|  | 			fail("Fail"); | ||||||
|  | 
 | ||||||
|  | 		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p)) | ||||||
|  | 			fail("Timeout"); | ||||||
|  | 		else if (!check_huge(p)) | ||||||
|  | 			success("OK"); | ||||||
|  | 		else | ||||||
|  | 			fail("Fail"); | ||||||
|  | 
 | ||||||
|  | 		printf("Trigger CoW on page %d of %d...", | ||||||
|  | 				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr); | ||||||
|  | 		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size); | ||||||
|  | 		if (!check_huge(p)) | ||||||
|  | 			success("OK"); | ||||||
|  | 		else | ||||||
|  | 			fail("Fail"); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p)) | ||||||
|  | 			fail("Timeout"); | ||||||
|  | 		else if (check_huge(p)) | ||||||
|  | 			success("OK"); | ||||||
|  | 		else | ||||||
|  | 			fail("Fail"); | ||||||
| 
 | 
 | ||||||
| 		validate_memory(p, 0, hpage_pmd_size); | 		validate_memory(p, 0, hpage_pmd_size); | ||||||
| 		munmap(p, hpage_pmd_size); | 		munmap(p, hpage_pmd_size); | ||||||
|  | @ -930,6 +1011,7 @@ int main(void) | ||||||
| 
 | 
 | ||||||
| 	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; | 	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; | ||||||
| 	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; | 	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; | ||||||
|  | 	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; | ||||||
| 	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; | 	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; | ||||||
| 
 | 
 | ||||||
| 	save_settings(); | 	save_settings(); | ||||||
|  | @ -947,6 +1029,7 @@ int main(void) | ||||||
| 	collapse_compound_extreme(); | 	collapse_compound_extreme(); | ||||||
| 	collapse_fork(); | 	collapse_fork(); | ||||||
| 	collapse_fork_compound(); | 	collapse_fork_compound(); | ||||||
|  | 	collapse_max_ptes_shared(); | ||||||
| 
 | 
 | ||||||
| 	restore_settings(0); | 	restore_settings(0); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Kirill A. Shutemov
						Kirill A. Shutemov