forked from mirrors/linux
		
	mm: Make compound_pincount always available
Move compound_pincount from the third page to the second page, which means it's available for all compound pages. That lets us delete hpage_pincount_available(). On 32-bit systems, there isn't enough space for both compound_pincount and compound_nr in the second page (it would collide with page->private, which is in use for pages in the swap cache), so revert the optimisation of storing both compound_order and compound_nr on 32-bit systems. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com>
This commit is contained in:
		
							parent
							
								
									6315d8a23c
								
							
						
					
					
						commit
						5232c63f46
					
				
					 8 changed files with 42 additions and 51 deletions
				
			
		|  | @ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct | |||
| pages* array, and the function then pins pages by incrementing each by a special | ||||
| value: GUP_PIN_COUNTING_BIAS. | ||||
| 
 | ||||
| For huge pages (and in fact, any compound page of more than 2 pages), the | ||||
| GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting | ||||
| is achieved, by using the 3rd struct page in the compound page. A new struct | ||||
| page field, hpage_pinned_refcount, has been added in order to support this. | ||||
| For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead, | ||||
| an exact form of pin counting is achieved, by using the 2nd struct page | ||||
| in the compound page. A new struct page field, compound_pincount, has | ||||
| been added in order to support this. | ||||
| 
 | ||||
| This approach for compound pages avoids the counting upper limit problems that | ||||
| are discussed below. Those limitations would have been aggravated severely by | ||||
| huge pages, because each tail page adds a refcount to the head page. And in | ||||
| fact, testing revealed that, without a separate hpage_pinned_refcount field, | ||||
| fact, testing revealed that, without a separate compound_pincount field, | ||||
| page overflows were seen in some huge page stress tests. | ||||
| 
 | ||||
| This also means that huge pages and compound pages (of order > 1) do not suffer | ||||
| This also means that huge pages and compound pages do not suffer | ||||
| from the false positives problem that is mentioned below.:: | ||||
| 
 | ||||
|  Function | ||||
|  | @ -264,9 +264,9 @@ place.) | |||
| Other diagnostics | ||||
| ================= | ||||
| 
 | ||||
| dump_page() has been enhanced slightly, to handle these new counting fields, and | ||||
| to better report on compound pages in general. Specifically, for compound pages | ||||
| with order > 1, the exact (hpage_pinned_refcount) pincount is reported. | ||||
| dump_page() has been enhanced slightly, to handle these new counting | ||||
| fields, and to better report on compound pages in general. Specifically, | ||||
| for compound pages, the exact (compound_pincount) pincount is reported. | ||||
| 
 | ||||
| References | ||||
| ========== | ||||
|  |  | |||
|  | @ -887,17 +887,6 @@ static inline void destroy_compound_page(struct page *page) | |||
| 	compound_page_dtors[page[1].compound_dtor](page); | ||||
| } | ||||
| 
 | ||||
| static inline bool hpage_pincount_available(struct page *page) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Can the page->hpage_pinned_refcount field be used? That field is in | ||||
| 	 * the 3rd page of the compound page, so the smallest (2-page) compound | ||||
| 	 * pages cannot support it. | ||||
| 	 */ | ||||
| 	page = compound_head(page); | ||||
| 	return PageCompound(page) && compound_order(page) > 1; | ||||
| } | ||||
| 
 | ||||
| static inline int head_compound_pincount(struct page *head) | ||||
| { | ||||
| 	return atomic_read(compound_pincount_ptr(head)); | ||||
|  | @ -905,7 +894,7 @@ static inline int head_compound_pincount(struct page *head) | |||
| 
 | ||||
| static inline int compound_pincount(struct page *page) | ||||
| { | ||||
| 	VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); | ||||
| 	VM_BUG_ON_PAGE(!PageCompound(page), page); | ||||
| 	page = compound_head(page); | ||||
| 	return head_compound_pincount(page); | ||||
| } | ||||
|  | @ -913,7 +902,9 @@ static inline int compound_pincount(struct page *page) | |||
| static inline void set_compound_order(struct page *page, unsigned int order) | ||||
| { | ||||
| 	page[1].compound_order = order; | ||||
| #ifdef CONFIG_64BIT | ||||
| 	page[1].compound_nr = 1U << order; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /* Returns the number of pages in this potentially compound page. */ | ||||
|  | @ -921,7 +912,11 @@ static inline unsigned long compound_nr(struct page *page) | |||
| { | ||||
| 	if (!PageHead(page)) | ||||
| 		return 1; | ||||
| #ifdef CONFIG_64BIT | ||||
| 	return page[1].compound_nr; | ||||
| #else | ||||
| 	return 1UL << compound_order(page); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /* Returns the number of bytes in this potentially compound page. */ | ||||
|  | @ -1269,7 +1264,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); | |||
|  */ | ||||
| static inline bool page_maybe_dma_pinned(struct page *page) | ||||
| { | ||||
| 	if (hpage_pincount_available(page)) | ||||
| 	if (PageCompound(page)) | ||||
| 		return compound_pincount(page) > 0; | ||||
| 
 | ||||
| 	/*
 | ||||
|  |  | |||
|  | @ -135,11 +135,14 @@ struct page { | |||
| 			unsigned char compound_dtor; | ||||
| 			unsigned char compound_order; | ||||
| 			atomic_t compound_mapcount; | ||||
| 			atomic_t compound_pincount; | ||||
| #ifdef CONFIG_64BIT | ||||
| 			unsigned int compound_nr; /* 1 << compound_order */ | ||||
| #endif | ||||
| 		}; | ||||
| 		struct {	/* Second tail page of compound page */ | ||||
| 			unsigned long _compound_pad_1;	/* compound_head */ | ||||
| 			atomic_t hpage_pinned_refcount; | ||||
| 			unsigned long _compound_pad_2; | ||||
| 			/* For both global and memcg */ | ||||
| 			struct list_head deferred_list; | ||||
| 		}; | ||||
|  | @ -300,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) | |||
| 
 | ||||
| static inline atomic_t *compound_pincount_ptr(struct page *page) | ||||
| { | ||||
| 	return &page[2].hpage_pinned_refcount; | ||||
| 	return &page[1].compound_pincount; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
							
								
								
									
										14
									
								
								mm/debug.c
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								mm/debug.c
									
									
									
									
									
								
							|  | @ -92,16 +92,10 @@ static void __dump_page(struct page *page) | |||
| 			page, page_ref_count(head), mapcount, mapping, | ||||
| 			page_to_pgoff(page), page_to_pfn(page)); | ||||
| 	if (compound) { | ||||
| 		if (hpage_pincount_available(page)) { | ||||
| 			pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", | ||||
| 					head, compound_order(head), | ||||
| 					head_compound_mapcount(head), | ||||
| 					head_compound_pincount(head)); | ||||
| 		} else { | ||||
| 			pr_warn("head:%p order:%u compound_mapcount:%d\n", | ||||
| 					head, compound_order(head), | ||||
| 					head_compound_mapcount(head)); | ||||
| 		} | ||||
| 		pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", | ||||
| 				head, compound_order(head), | ||||
| 				head_compound_mapcount(head), | ||||
| 				head_compound_pincount(head)); | ||||
| 	} | ||||
| 
 | ||||
| #ifdef CONFIG_MEMCG | ||||
|  |  | |||
							
								
								
									
										20
									
								
								mm/gup.c
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								mm/gup.c
									
									
									
									
									
								
							|  | @ -99,12 +99,11 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) | |||
|  * | ||||
|  *    FOLL_GET: page's refcount will be incremented by @refs. | ||||
|  * | ||||
|  *    FOLL_PIN on compound pages that are > two pages long: page's refcount will | ||||
|  *    be incremented by @refs, and page[2].hpage_pinned_refcount will be | ||||
|  *    incremented by @refs * GUP_PIN_COUNTING_BIAS. | ||||
|  *    FOLL_PIN on compound pages: page's refcount will be incremented by | ||||
|  *    @refs, and page[1].compound_pincount will be incremented by @refs. | ||||
|  * | ||||
|  *    FOLL_PIN on normal pages, or compound pages that are two pages long: | ||||
|  *    page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS. | ||||
|  *    FOLL_PIN on normal pages: page's refcount will be incremented by | ||||
|  *    @refs * GUP_PIN_COUNTING_BIAS. | ||||
|  * | ||||
|  * Return: head page (with refcount appropriately incremented) for success, or | ||||
|  * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's | ||||
|  | @ -135,16 +134,15 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, | |||
| 			return NULL; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * When pinning a compound page of order > 1 (which is | ||||
| 		 * what hpage_pincount_available() checks for), use an | ||||
| 		 * exact count to track it. | ||||
| 		 * When pinning a compound page, use an exact count to | ||||
| 		 * track it. | ||||
| 		 * | ||||
| 		 * However, be sure to *also* increment the normal page | ||||
| 		 * refcount field at least once, so that the page really | ||||
| 		 * is pinned.  That's why the refcount from the earlier | ||||
| 		 * try_get_compound_head() is left intact. | ||||
| 		 */ | ||||
| 		if (hpage_pincount_available(page)) | ||||
| 		if (PageHead(page)) | ||||
| 			atomic_add(refs, compound_pincount_ptr(page)); | ||||
| 		else | ||||
| 			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); | ||||
|  | @ -166,7 +164,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) | |||
| 	if (flags & FOLL_PIN) { | ||||
| 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, | ||||
| 				    refs); | ||||
| 		if (hpage_pincount_available(page)) | ||||
| 		if (PageHead(page)) | ||||
| 			atomic_sub(refs, compound_pincount_ptr(page)); | ||||
| 		else | ||||
| 			refs *= GUP_PIN_COUNTING_BIAS; | ||||
|  | @ -211,7 +209,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) | |||
| 		 * increment the normal page refcount field at least once, | ||||
| 		 * so that the page really is pinned. | ||||
| 		 */ | ||||
| 		if (hpage_pincount_available(page)) { | ||||
| 		if (PageHead(page)) { | ||||
| 			page_ref_add(page, 1); | ||||
| 			atomic_add(1, compound_pincount_ptr(page)); | ||||
| 		} else { | ||||
|  |  | |||
|  | @ -1320,7 +1320,9 @@ static void __destroy_compound_gigantic_page(struct page *page, | |||
| 	} | ||||
| 
 | ||||
| 	set_compound_order(page, 0); | ||||
| #ifdef CONFIG_64BIT | ||||
| 	page[1].compound_nr = 0; | ||||
| #endif | ||||
| 	__ClearPageHead(page); | ||||
| } | ||||
| 
 | ||||
|  | @ -1812,7 +1814,9 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, | |||
| 	for (; j < nr_pages; j++, p = mem_map_next(p, page, j)) | ||||
| 		__ClearPageReserved(p); | ||||
| 	set_compound_order(page, 0); | ||||
| #ifdef CONFIG_64BIT | ||||
| 	page[1].compound_nr = 0; | ||||
| #endif | ||||
| 	__ClearPageHead(page); | ||||
| 	return false; | ||||
| } | ||||
|  |  | |||
|  | @ -734,8 +734,7 @@ static void prep_compound_head(struct page *page, unsigned int order) | |||
| 	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); | ||||
| 	set_compound_order(page, order); | ||||
| 	atomic_set(compound_mapcount_ptr(page), -1); | ||||
| 	if (hpage_pincount_available(page)) | ||||
| 		atomic_set(compound_pincount_ptr(page), 0); | ||||
| 	atomic_set(compound_pincount_ptr(page), 0); | ||||
| } | ||||
| 
 | ||||
| static void prep_compound_tail(struct page *head, int tail_idx) | ||||
|  |  | |||
|  | @ -1219,8 +1219,7 @@ void page_add_new_anon_rmap(struct page *page, | |||
| 		VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||||
| 		/* increment count (starts at -1) */ | ||||
| 		atomic_set(compound_mapcount_ptr(page), 0); | ||||
| 		if (hpage_pincount_available(page)) | ||||
| 			atomic_set(compound_pincount_ptr(page), 0); | ||||
| 		atomic_set(compound_pincount_ptr(page), 0); | ||||
| 
 | ||||
| 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr); | ||||
| 	} else { | ||||
|  | @ -2353,8 +2352,7 @@ void hugepage_add_new_anon_rmap(struct page *page, | |||
| { | ||||
| 	BUG_ON(address < vma->vm_start || address >= vma->vm_end); | ||||
| 	atomic_set(compound_mapcount_ptr(page), 0); | ||||
| 	if (hpage_pincount_available(page)) | ||||
| 		atomic_set(compound_pincount_ptr(page), 0); | ||||
| 	atomic_set(compound_pincount_ptr(page), 0); | ||||
| 
 | ||||
| 	__page_set_anon_rmap(page, vma, address, 1); | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Matthew Wilcox (Oracle)
						Matthew Wilcox (Oracle)