forked from mirrors/linux
		
	mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if PMD-mapped
Can the lock_compound_mapcount() bit_spin_lock apparatus be removed now? Yes. Not by atomic64_t or cmpxchg games, those get difficult on 32-bit; but if we slightly abuse subpages_mapcount by additionally demanding that one bit be set there when the compound page is PMD-mapped, then a cascade of two atomic ops is able to maintain the stats without bit_spin_lock. This is harder to reason about than when bit_spin_locked, but I believe safe; and no drift in stats detected when testing. When there are racing removes and adds, of course the sequence of operations is less well- defined; but each operation on subpages_mapcount is atomically good. What might be disastrous, is if subpages_mapcount could ever fleetingly appear negative: but the pte lock (or pmd lock) these rmap functions are called under, ensures that a last remove cannot race ahead of a first add. Continue to make an exception for hugetlb (PageHuge) pages, though that exception can be easily removed by a further commit if necessary: leave subpages_mapcount 0, don't bother with COMPOUND_MAPPED in its case, just carry on checking compound_mapcount too in folio_mapped(), page_mapped(). Evidence is that this way goes slightly faster than the previous implementation in all cases (pmds after ptes now taking around 103ms); and relieves us of worrying about contention on the bit_spin_lock. Link: https://lkml.kernel.org/r/3978f3ca-5473-55a7-4e14-efea5968d892@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Dan Carpenter <error27@gmail.com> Cc: David Hildenbrand <david@redhat.com> Cc: James Houghton <jthoughton@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Peter Xu <peterx@redhat.com> Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yang Shi <shy828301@gmail.com> Cc: Yu Zhao <yuzhao@google.com> Cc: Zach O'Keefe <zokeefe@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									be5ef2d9b0
								
							
						
					
					
						commit
						4b51634cd1
					
				
					 5 changed files with 51 additions and 111 deletions
				
			
		|  | @ -118,15 +118,14 @@ pages: | |||
|     succeeds on tail pages. | ||||
| 
 | ||||
|   - map/unmap of PMD entry for the whole compound page increment/decrement | ||||
|     ->compound_mapcount, stored in the first tail page of the compound page. | ||||
|     ->compound_mapcount, stored in the first tail page of the compound page; | ||||
|     and also increment/decrement ->subpages_mapcount (also in the first tail) | ||||
|     by COMPOUND_MAPPED when compound_mapcount goes from -1 to 0 or 0 to -1. | ||||
| 
 | ||||
|   - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount | ||||
|     on relevant sub-page of the compound page, and also increment/decrement | ||||
|     ->subpages_mapcount, stored in first tail page of the compound page, when | ||||
|     _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE. | ||||
|     In order to have race-free accounting of sub-pages mapped, changes to | ||||
|     sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are | ||||
|     are all locked by bit_spin_lock of PG_locked in the first tail ->flags. | ||||
| 
 | ||||
| split_huge_page internally has to distribute the refcounts in the head | ||||
| page to the tail pages before clearing all PG_head/tail bits from the page | ||||
|  |  | |||
|  | @ -836,13 +836,22 @@ static inline int head_compound_mapcount(struct page *head) | |||
| 	return atomic_read(compound_mapcount_ptr(head)) + 1; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages, | ||||
|  * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit | ||||
|  * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently | ||||
|  * leaves subpages_mapcount at 0, but avoid surprise if it participates later. | ||||
|  */ | ||||
| #define COMPOUND_MAPPED	0x800000 | ||||
| #define SUBPAGES_MAPPED	(COMPOUND_MAPPED - 1) | ||||
| 
 | ||||
| /*
 | ||||
|  * Number of sub-pages mapped by PTE, does not include compound mapcount. | ||||
|  * Must be called only on head of compound page. | ||||
|  */ | ||||
| static inline int head_subpages_mapcount(struct page *head) | ||||
| { | ||||
| 	return atomic_read(subpages_mapcount_ptr(head)); | ||||
| 	return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -902,8 +911,12 @@ static inline int total_mapcount(struct page *page) | |||
| 
 | ||||
| static inline bool folio_large_is_mapped(struct folio *folio) | ||||
| { | ||||
| 	return atomic_read(folio_mapcount_ptr(folio)) + | ||||
| 		atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0; | ||||
| 	/*
 | ||||
| 	 * Reading folio_mapcount_ptr() below could be omitted if hugetlb | ||||
| 	 * participated in incrementing subpages_mapcount when compound mapped. | ||||
| 	 */ | ||||
| 	return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 || | ||||
| 		atomic_read(folio_mapcount_ptr(folio)) >= 0; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  |  | |||
|  | @ -204,15 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, | |||
| void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, | ||||
| 		unsigned long address); | ||||
| 
 | ||||
| void page_dup_compound_rmap(struct page *page); | ||||
| static inline void __page_dup_rmap(struct page *page, bool compound) | ||||
| { | ||||
| 	atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); | ||||
| } | ||||
| 
 | ||||
| static inline void page_dup_file_rmap(struct page *page, bool compound) | ||||
| { | ||||
| 	/* Is page being mapped by PTE? */ | ||||
| 	if (likely(!compound)) | ||||
| 		atomic_inc(&page->_mapcount); | ||||
| 	else | ||||
| 		page_dup_compound_rmap(page); | ||||
| 	__page_dup_rmap(page, compound); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -261,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, | |||
| 	 * the page R/O into both processes. | ||||
| 	 */ | ||||
| dup: | ||||
| 	page_dup_file_rmap(page, compound); | ||||
| 	__page_dup_rmap(page, compound); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1330,7 +1330,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) | |||
| 			bad_page(page, "nonzero compound_mapcount"); | ||||
| 			goto out; | ||||
| 		} | ||||
| 		if (unlikely(head_subpages_mapcount(head_page))) { | ||||
| 		if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) { | ||||
| 			bad_page(page, "nonzero subpages_mapcount"); | ||||
| 			goto out; | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										121
									
								
								mm/rmap.c
									
									
									
									
									
								
							
							
						
						
									
										121
									
								
								mm/rmap.c
									
									
									
									
									
								
							|  | @ -1085,38 +1085,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, | |||
| 	return page_vma_mkclean_one(&pvmw); | ||||
| } | ||||
| 
 | ||||
| struct compound_mapcounts { | ||||
| 	unsigned int compound_mapcount; | ||||
| 	unsigned int subpages_mapcount; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * lock_compound_mapcounts() first locks, then copies subpages_mapcount and | ||||
|  * compound_mapcount from head[1].compound_mapcount and subpages_mapcount, | ||||
|  * converting from struct page's internal representation to logical count | ||||
|  * (that is, adding 1 to compound_mapcount to hide its offset by -1). | ||||
|  */ | ||||
| static void lock_compound_mapcounts(struct page *head, | ||||
| 		struct compound_mapcounts *local) | ||||
| { | ||||
| 	bit_spin_lock(PG_locked, &head[1].flags); | ||||
| 	local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1; | ||||
| 	local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * After caller has updated subpage._mapcount, local subpages_mapcount and | ||||
|  * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts | ||||
|  * and copies them back to the compound head[1] fields, and then unlocks. | ||||
|  */ | ||||
| static void unlock_compound_mapcounts(struct page *head, | ||||
| 		struct compound_mapcounts *local) | ||||
| { | ||||
| 	atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1); | ||||
| 	atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount); | ||||
| 	bit_spin_unlock(PG_locked, &head[1].flags); | ||||
| } | ||||
| 
 | ||||
| int total_compound_mapcount(struct page *head) | ||||
| { | ||||
| 	int mapcount = head_compound_mapcount(head); | ||||
|  | @ -1140,34 +1108,6 @@ int total_compound_mapcount(struct page *head) | |||
| 	return mapcount; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * page_dup_compound_rmap(), used when copying mm, | ||||
|  * provides a simple example of using lock_ and unlock_compound_mapcounts(). | ||||
|  */ | ||||
| void page_dup_compound_rmap(struct page *head) | ||||
| { | ||||
| 	struct compound_mapcounts mapcounts; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Hugetlb pages could use lock_compound_mapcounts(), like THPs do; | ||||
| 	 * but at present they are still being managed by atomic operations: | ||||
| 	 * which are likely to be somewhat faster, so don't rush to convert | ||||
| 	 * them over without evaluating the effect. | ||||
| 	 * | ||||
| 	 * Note that hugetlb does not call page_add_file_rmap(): | ||||
| 	 * here is where hugetlb shared page mapcount is raised. | ||||
| 	 */ | ||||
| 	if (PageHuge(head)) { | ||||
| 		atomic_inc(compound_mapcount_ptr(head)); | ||||
| 	} else if (PageTransHuge(head)) { | ||||
| 		/* That test is redundant: it's for safety or to optimize out */ | ||||
| 
 | ||||
| 		lock_compound_mapcounts(head, &mapcounts); | ||||
| 		mapcounts.compound_mapcount++; | ||||
| 		unlock_compound_mapcounts(head, &mapcounts); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * page_move_anon_rmap - move a page to our anon_vma | ||||
|  * @page:	the page to move to our anon_vma | ||||
|  | @ -1277,7 +1217,7 @@ static void __page_check_anon_rmap(struct page *page, | |||
| void page_add_anon_rmap(struct page *page, | ||||
| 	struct vm_area_struct *vma, unsigned long address, rmap_t flags) | ||||
| { | ||||
| 	struct compound_mapcounts mapcounts; | ||||
| 	atomic_t *mapped; | ||||
| 	int nr = 0, nr_pmdmapped = 0; | ||||
| 	bool compound = flags & RMAP_COMPOUND; | ||||
| 	bool first = true; | ||||
|  | @ -1290,24 +1230,20 @@ void page_add_anon_rmap(struct page *page, | |||
| 		first = atomic_inc_and_test(&page->_mapcount); | ||||
| 		nr = first; | ||||
| 		if (first && PageCompound(page)) { | ||||
| 			struct page *head = compound_head(page); | ||||
| 
 | ||||
| 			lock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapcounts.subpages_mapcount++; | ||||
| 			nr = !mapcounts.compound_mapcount; | ||||
| 			unlock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapped = subpages_mapcount_ptr(compound_head(page)); | ||||
| 			nr = atomic_inc_return_relaxed(mapped); | ||||
| 			nr = !(nr & COMPOUND_MAPPED); | ||||
| 		} | ||||
| 	} else if (PageTransHuge(page)) { | ||||
| 		/* That test is redundant: it's for safety or to optimize out */ | ||||
| 
 | ||||
| 		lock_compound_mapcounts(page, &mapcounts); | ||||
| 		first = !mapcounts.compound_mapcount; | ||||
| 		mapcounts.compound_mapcount++; | ||||
| 		first = atomic_inc_and_test(compound_mapcount_ptr(page)); | ||||
| 		if (first) { | ||||
| 			mapped = subpages_mapcount_ptr(page); | ||||
| 			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); | ||||
| 			nr_pmdmapped = thp_nr_pages(page); | ||||
| 			nr = nr_pmdmapped - mapcounts.subpages_mapcount; | ||||
| 			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); | ||||
| 		} | ||||
| 		unlock_compound_mapcounts(page, &mapcounts); | ||||
| 	} | ||||
| 
 | ||||
| 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page); | ||||
|  | @ -1360,6 +1296,7 @@ void page_add_new_anon_rmap(struct page *page, | |||
| 		VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||||
| 		/* increment count (starts at -1) */ | ||||
| 		atomic_set(compound_mapcount_ptr(page), 0); | ||||
| 		atomic_set(subpages_mapcount_ptr(page), COMPOUND_MAPPED); | ||||
| 		nr = thp_nr_pages(page); | ||||
| 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr); | ||||
| 	} | ||||
|  | @ -1379,7 +1316,7 @@ void page_add_new_anon_rmap(struct page *page, | |||
| void page_add_file_rmap(struct page *page, | ||||
| 	struct vm_area_struct *vma, bool compound) | ||||
| { | ||||
| 	struct compound_mapcounts mapcounts; | ||||
| 	atomic_t *mapped; | ||||
| 	int nr = 0, nr_pmdmapped = 0; | ||||
| 	bool first; | ||||
| 
 | ||||
|  | @ -1391,24 +1328,20 @@ void page_add_file_rmap(struct page *page, | |||
| 		first = atomic_inc_and_test(&page->_mapcount); | ||||
| 		nr = first; | ||||
| 		if (first && PageCompound(page)) { | ||||
| 			struct page *head = compound_head(page); | ||||
| 
 | ||||
| 			lock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapcounts.subpages_mapcount++; | ||||
| 			nr = !mapcounts.compound_mapcount; | ||||
| 			unlock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapped = subpages_mapcount_ptr(compound_head(page)); | ||||
| 			nr = atomic_inc_return_relaxed(mapped); | ||||
| 			nr = !(nr & COMPOUND_MAPPED); | ||||
| 		} | ||||
| 	} else if (PageTransHuge(page)) { | ||||
| 		/* That test is redundant: it's for safety or to optimize out */ | ||||
| 
 | ||||
| 		lock_compound_mapcounts(page, &mapcounts); | ||||
| 		first = !mapcounts.compound_mapcount; | ||||
| 		mapcounts.compound_mapcount++; | ||||
| 		first = atomic_inc_and_test(compound_mapcount_ptr(page)); | ||||
| 		if (first) { | ||||
| 			mapped = subpages_mapcount_ptr(page); | ||||
| 			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); | ||||
| 			nr_pmdmapped = thp_nr_pages(page); | ||||
| 			nr = nr_pmdmapped - mapcounts.subpages_mapcount; | ||||
| 			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); | ||||
| 		} | ||||
| 		unlock_compound_mapcounts(page, &mapcounts); | ||||
| 	} | ||||
| 
 | ||||
| 	if (nr_pmdmapped) | ||||
|  | @ -1432,7 +1365,7 @@ void page_add_file_rmap(struct page *page, | |||
| void page_remove_rmap(struct page *page, | ||||
| 	struct vm_area_struct *vma, bool compound) | ||||
| { | ||||
| 	struct compound_mapcounts mapcounts; | ||||
| 	atomic_t *mapped; | ||||
| 	int nr = 0, nr_pmdmapped = 0; | ||||
| 	bool last; | ||||
| 
 | ||||
|  | @ -1452,24 +1385,20 @@ void page_remove_rmap(struct page *page, | |||
| 		last = atomic_add_negative(-1, &page->_mapcount); | ||||
| 		nr = last; | ||||
| 		if (last && PageCompound(page)) { | ||||
| 			struct page *head = compound_head(page); | ||||
| 
 | ||||
| 			lock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapcounts.subpages_mapcount--; | ||||
| 			nr = !mapcounts.compound_mapcount; | ||||
| 			unlock_compound_mapcounts(head, &mapcounts); | ||||
| 			mapped = subpages_mapcount_ptr(compound_head(page)); | ||||
| 			nr = atomic_dec_return_relaxed(mapped); | ||||
| 			nr = !(nr & COMPOUND_MAPPED); | ||||
| 		} | ||||
| 	} else if (PageTransHuge(page)) { | ||||
| 		/* That test is redundant: it's for safety or to optimize out */ | ||||
| 
 | ||||
| 		lock_compound_mapcounts(page, &mapcounts); | ||||
| 		mapcounts.compound_mapcount--; | ||||
| 		last = !mapcounts.compound_mapcount; | ||||
| 		last = atomic_add_negative(-1, compound_mapcount_ptr(page)); | ||||
| 		if (last) { | ||||
| 			mapped = subpages_mapcount_ptr(page); | ||||
| 			nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); | ||||
| 			nr_pmdmapped = thp_nr_pages(page); | ||||
| 			nr = nr_pmdmapped - mapcounts.subpages_mapcount; | ||||
| 			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED); | ||||
| 		} | ||||
| 		unlock_compound_mapcounts(page, &mapcounts); | ||||
| 	} | ||||
| 
 | ||||
| 	if (nr_pmdmapped) { | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Hugh Dickins
						Hugh Dickins