forked from mirrors/linux
		
	mm/ksm: refactor out try_to_merge_with_zero_page()
Patch series "mm/ksm: cmp_and_merge_page() optimizations and cleanup", v2. This series mainly optimizes cmp_and_merge_page() to have more efficient separate code flow for ksm page and non-ksm anon page. - ksm page: don't need to calculate the checksum obviously. - anon page: don't need to search stable tree if changing fast and try to merge with zero page before searching ksm page on stable tree. Please see the patch-2 for details. Patch-3 is cleanup also a little optimization for the chain()/chain_prune interfaces, which made the stable_tree_search()/stable_tree_insert() over complex. I have done simple testing using "hackbench -g 1 -l 300000" (maybe I need to use a better workload) on my machine, have seen a little CPU usage decrease of ksmd and some improvements of cmp_and_merge_page() latency: We can see the latency of cmp_and_merge_page() when handling non-ksm anon pages has been improved. This patch (of 3): In preparation for later changes, refactor out a new function called try_to_merge_with_zero_page(), which tries to merge with zero page. Link: https://lkml.kernel.org/r/20240621-b4-ksm-scan-optimize-v2-0-1c328aa9e30b@linux.dev Link: https://lkml.kernel.org/r/20240621-b4-ksm-scan-optimize-v2-1-1c328aa9e30b@linux.dev Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Stefan Roesch <shr@devkernel.io> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									003af997c8
								
							
						
					
					
						commit
						ac90c56bbd
					
				
					 2 changed files with 40 additions and 31 deletions
				
			
		|  | @ -2666,7 +2666,6 @@ static int gather_surplus_pages(struct hstate *h, long delta) | ||||||
| retry: | retry: | ||||||
| 	spin_unlock_irq(&hugetlb_lock); | 	spin_unlock_irq(&hugetlb_lock); | ||||||
| 	for (i = 0; i < needed; i++) { | 	for (i = 0; i < needed; i++) { | ||||||
| 		folio = NULL; |  | ||||||
| 		for_each_node_mask(node, cpuset_current_mems_allowed) { | 		for_each_node_mask(node, cpuset_current_mems_allowed) { | ||||||
| 			if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) { | 			if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) { | ||||||
| 				folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h), | 				folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h), | ||||||
|  |  | ||||||
							
								
								
									
										70
									
								
								mm/ksm.c
									
									
									
									
									
								
							
							
						
						
									
										70
									
								
								mm/ksm.c
									
									
									
									
									
								
							|  | @ -1527,6 +1527,44 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * This function returns 0 if the pages were merged or if they are | ||||||
|  |  * no longer merging candidates (e.g., VMA stale), -EFAULT otherwise. | ||||||
|  |  */ | ||||||
|  | static int try_to_merge_with_zero_page(struct ksm_rmap_item *rmap_item, | ||||||
|  | 				       struct page *page) | ||||||
|  | { | ||||||
|  | 	struct mm_struct *mm = rmap_item->mm; | ||||||
|  | 	int err = -EFAULT; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Same checksum as an empty page. We attempt to merge it with the | ||||||
|  | 	 * appropriate zero page if the user enabled this via sysfs. | ||||||
|  | 	 */ | ||||||
|  | 	if (ksm_use_zero_pages && (rmap_item->oldchecksum == zero_checksum)) { | ||||||
|  | 		struct vm_area_struct *vma; | ||||||
|  | 
 | ||||||
|  | 		mmap_read_lock(mm); | ||||||
|  | 		vma = find_mergeable_vma(mm, rmap_item->address); | ||||||
|  | 		if (vma) { | ||||||
|  | 			err = try_to_merge_one_page(vma, page, | ||||||
|  | 					ZERO_PAGE(rmap_item->address)); | ||||||
|  | 			trace_ksm_merge_one_page( | ||||||
|  | 				page_to_pfn(ZERO_PAGE(rmap_item->address)), | ||||||
|  | 				rmap_item, mm, err); | ||||||
|  | 		} else { | ||||||
|  | 			/*
 | ||||||
|  | 			 * If the vma is out of date, we do not need to | ||||||
|  | 			 * continue. | ||||||
|  | 			 */ | ||||||
|  | 			err = 0; | ||||||
|  | 		} | ||||||
|  | 		mmap_read_unlock(mm); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return err; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * try_to_merge_with_ksm_page - like try_to_merge_two_pages, |  * try_to_merge_with_ksm_page - like try_to_merge_two_pages, | ||||||
|  * but no new kernel page is allocated: kpage must already be a ksm page. |  * but no new kernel page is allocated: kpage must already be a ksm page. | ||||||
|  | @ -2302,7 +2340,6 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item, | ||||||
|  */ |  */ | ||||||
| static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item) | static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item) | ||||||
| { | { | ||||||
| 	struct mm_struct *mm = rmap_item->mm; |  | ||||||
| 	struct ksm_rmap_item *tree_rmap_item; | 	struct ksm_rmap_item *tree_rmap_item; | ||||||
| 	struct page *tree_page = NULL; | 	struct page *tree_page = NULL; | ||||||
| 	struct ksm_stable_node *stable_node; | 	struct ksm_stable_node *stable_node; | ||||||
|  | @ -2371,36 +2408,9 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite | ||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	if (!try_to_merge_with_zero_page(rmap_item, page)) | ||||||
| 	 * Same checksum as an empty page. We attempt to merge it with the | 		return; | ||||||
| 	 * appropriate zero page if the user enabled this via sysfs. |  | ||||||
| 	 */ |  | ||||||
| 	if (ksm_use_zero_pages && (checksum == zero_checksum)) { |  | ||||||
| 		struct vm_area_struct *vma; |  | ||||||
| 
 | 
 | ||||||
| 		mmap_read_lock(mm); |  | ||||||
| 		vma = find_mergeable_vma(mm, rmap_item->address); |  | ||||||
| 		if (vma) { |  | ||||||
| 			err = try_to_merge_one_page(vma, page, |  | ||||||
| 					ZERO_PAGE(rmap_item->address)); |  | ||||||
| 			trace_ksm_merge_one_page( |  | ||||||
| 				page_to_pfn(ZERO_PAGE(rmap_item->address)), |  | ||||||
| 				rmap_item, mm, err); |  | ||||||
| 		} else { |  | ||||||
| 			/*
 |  | ||||||
| 			 * If the vma is out of date, we do not need to |  | ||||||
| 			 * continue. |  | ||||||
| 			 */ |  | ||||||
| 			err = 0; |  | ||||||
| 		} |  | ||||||
| 		mmap_read_unlock(mm); |  | ||||||
| 		/*
 |  | ||||||
| 		 * In case of failure, the page was not really empty, so we |  | ||||||
| 		 * need to continue. Otherwise we're done. |  | ||||||
| 		 */ |  | ||||||
| 		if (!err) |  | ||||||
| 			return; |  | ||||||
| 	} |  | ||||||
| 	tree_rmap_item = | 	tree_rmap_item = | ||||||
| 		unstable_tree_search_insert(rmap_item, page, &tree_page); | 		unstable_tree_search_insert(rmap_item, page, &tree_page); | ||||||
| 	if (tree_rmap_item) { | 	if (tree_rmap_item) { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Chengming Zhou
						Chengming Zhou