forked from mirrors/linux
		
	mm: Make compound_pincount always available
Move compound_pincount from the third page to the second page, which means it's available for all compound pages. That lets us delete hpage_pincount_available(). On 32-bit systems, there isn't enough space for both compound_pincount and compound_nr in the second page (it would collide with page->private, which is in use for pages in the swap cache), so revert the optimisation of storing both compound_order and compound_nr on 32-bit systems. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com>
This commit is contained in:
		
							parent
							
								
									6315d8a23c
								
							
						
					
					
						commit
						5232c63f46
					
				
					 8 changed files with 42 additions and 51 deletions
				
			
		|  | @ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct | ||||||
| pages* array, and the function then pins pages by incrementing each by a special | pages* array, and the function then pins pages by incrementing each by a special | ||||||
| value: GUP_PIN_COUNTING_BIAS. | value: GUP_PIN_COUNTING_BIAS. | ||||||
| 
 | 
 | ||||||
| For huge pages (and in fact, any compound page of more than 2 pages), the | For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead, | ||||||
| GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting | an exact form of pin counting is achieved, by using the 2nd struct page | ||||||
| is achieved, by using the 3rd struct page in the compound page. A new struct | in the compound page. A new struct page field, compound_pincount, has | ||||||
| page field, hpage_pinned_refcount, has been added in order to support this. | been added in order to support this. | ||||||
| 
 | 
 | ||||||
| This approach for compound pages avoids the counting upper limit problems that | This approach for compound pages avoids the counting upper limit problems that | ||||||
| are discussed below. Those limitations would have been aggravated severely by | are discussed below. Those limitations would have been aggravated severely by | ||||||
| huge pages, because each tail page adds a refcount to the head page. And in | huge pages, because each tail page adds a refcount to the head page. And in | ||||||
| fact, testing revealed that, without a separate hpage_pinned_refcount field, | fact, testing revealed that, without a separate compound_pincount field, | ||||||
| page overflows were seen in some huge page stress tests. | page overflows were seen in some huge page stress tests. | ||||||
| 
 | 
 | ||||||
| This also means that huge pages and compound pages (of order > 1) do not suffer | This also means that huge pages and compound pages do not suffer | ||||||
| from the false positives problem that is mentioned below.:: | from the false positives problem that is mentioned below.:: | ||||||
| 
 | 
 | ||||||
|  Function |  Function | ||||||
|  | @ -264,9 +264,9 @@ place.) | ||||||
| Other diagnostics | Other diagnostics | ||||||
| ================= | ================= | ||||||
| 
 | 
 | ||||||
| dump_page() has been enhanced slightly, to handle these new counting fields, and | dump_page() has been enhanced slightly, to handle these new counting | ||||||
| to better report on compound pages in general. Specifically, for compound pages | fields, and to better report on compound pages in general. Specifically, | ||||||
| with order > 1, the exact (hpage_pinned_refcount) pincount is reported. | for compound pages, the exact (compound_pincount) pincount is reported. | ||||||
| 
 | 
 | ||||||
| References | References | ||||||
| ========== | ========== | ||||||
|  |  | ||||||
|  | @ -887,17 +887,6 @@ static inline void destroy_compound_page(struct page *page) | ||||||
| 	compound_page_dtors[page[1].compound_dtor](page); | 	compound_page_dtors[page[1].compound_dtor](page); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool hpage_pincount_available(struct page *page) |  | ||||||
| { |  | ||||||
| 	/*
 |  | ||||||
| 	 * Can the page->hpage_pinned_refcount field be used? That field is in |  | ||||||
| 	 * the 3rd page of the compound page, so the smallest (2-page) compound |  | ||||||
| 	 * pages cannot support it. |  | ||||||
| 	 */ |  | ||||||
| 	page = compound_head(page); |  | ||||||
| 	return PageCompound(page) && compound_order(page) > 1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline int head_compound_pincount(struct page *head) | static inline int head_compound_pincount(struct page *head) | ||||||
| { | { | ||||||
| 	return atomic_read(compound_pincount_ptr(head)); | 	return atomic_read(compound_pincount_ptr(head)); | ||||||
|  | @ -905,7 +894,7 @@ static inline int head_compound_pincount(struct page *head) | ||||||
| 
 | 
 | ||||||
| static inline int compound_pincount(struct page *page) | static inline int compound_pincount(struct page *page) | ||||||
| { | { | ||||||
| 	VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); | 	VM_BUG_ON_PAGE(!PageCompound(page), page); | ||||||
| 	page = compound_head(page); | 	page = compound_head(page); | ||||||
| 	return head_compound_pincount(page); | 	return head_compound_pincount(page); | ||||||
| } | } | ||||||
|  | @ -913,7 +902,9 @@ static inline int compound_pincount(struct page *page) | ||||||
| static inline void set_compound_order(struct page *page, unsigned int order) | static inline void set_compound_order(struct page *page, unsigned int order) | ||||||
| { | { | ||||||
| 	page[1].compound_order = order; | 	page[1].compound_order = order; | ||||||
|  | #ifdef CONFIG_64BIT | ||||||
| 	page[1].compound_nr = 1U << order; | 	page[1].compound_nr = 1U << order; | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Returns the number of pages in this potentially compound page. */ | /* Returns the number of pages in this potentially compound page. */ | ||||||
|  | @ -921,7 +912,11 @@ static inline unsigned long compound_nr(struct page *page) | ||||||
| { | { | ||||||
| 	if (!PageHead(page)) | 	if (!PageHead(page)) | ||||||
| 		return 1; | 		return 1; | ||||||
|  | #ifdef CONFIG_64BIT | ||||||
| 	return page[1].compound_nr; | 	return page[1].compound_nr; | ||||||
|  | #else | ||||||
|  | 	return 1UL << compound_order(page); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Returns the number of bytes in this potentially compound page. */ | /* Returns the number of bytes in this potentially compound page. */ | ||||||
|  | @ -1269,7 +1264,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); | ||||||
|  */ |  */ | ||||||
| static inline bool page_maybe_dma_pinned(struct page *page) | static inline bool page_maybe_dma_pinned(struct page *page) | ||||||
| { | { | ||||||
| 	if (hpage_pincount_available(page)) | 	if (PageCompound(page)) | ||||||
| 		return compound_pincount(page) > 0; | 		return compound_pincount(page) > 0; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
|  |  | ||||||
|  | @ -135,11 +135,14 @@ struct page { | ||||||
| 			unsigned char compound_dtor; | 			unsigned char compound_dtor; | ||||||
| 			unsigned char compound_order; | 			unsigned char compound_order; | ||||||
| 			atomic_t compound_mapcount; | 			atomic_t compound_mapcount; | ||||||
|  | 			atomic_t compound_pincount; | ||||||
|  | #ifdef CONFIG_64BIT | ||||||
| 			unsigned int compound_nr; /* 1 << compound_order */ | 			unsigned int compound_nr; /* 1 << compound_order */ | ||||||
|  | #endif | ||||||
| 		}; | 		}; | ||||||
| 		struct {	/* Second tail page of compound page */ | 		struct {	/* Second tail page of compound page */ | ||||||
| 			unsigned long _compound_pad_1;	/* compound_head */ | 			unsigned long _compound_pad_1;	/* compound_head */ | ||||||
| 			atomic_t hpage_pinned_refcount; | 			unsigned long _compound_pad_2; | ||||||
| 			/* For both global and memcg */ | 			/* For both global and memcg */ | ||||||
| 			struct list_head deferred_list; | 			struct list_head deferred_list; | ||||||
| 		}; | 		}; | ||||||
|  | @ -300,7 +303,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) | ||||||
| 
 | 
 | ||||||
| static inline atomic_t *compound_pincount_ptr(struct page *page) | static inline atomic_t *compound_pincount_ptr(struct page *page) | ||||||
| { | { | ||||||
| 	return &page[2].hpage_pinned_refcount; | 	return &page[1].compound_pincount; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								mm/debug.c
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								mm/debug.c
									
									
									
									
									
								
							|  | @ -92,16 +92,10 @@ static void __dump_page(struct page *page) | ||||||
| 			page, page_ref_count(head), mapcount, mapping, | 			page, page_ref_count(head), mapcount, mapping, | ||||||
| 			page_to_pgoff(page), page_to_pfn(page)); | 			page_to_pgoff(page), page_to_pfn(page)); | ||||||
| 	if (compound) { | 	if (compound) { | ||||||
| 		if (hpage_pincount_available(page)) { | 		pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", | ||||||
| 			pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", | 				head, compound_order(head), | ||||||
| 					head, compound_order(head), | 				head_compound_mapcount(head), | ||||||
| 					head_compound_mapcount(head), | 				head_compound_pincount(head)); | ||||||
| 					head_compound_pincount(head)); |  | ||||||
| 		} else { |  | ||||||
| 			pr_warn("head:%p order:%u compound_mapcount:%d\n", |  | ||||||
| 					head, compound_order(head), |  | ||||||
| 					head_compound_mapcount(head)); |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_MEMCG | #ifdef CONFIG_MEMCG | ||||||
|  |  | ||||||
							
								
								
									
										20
									
								
								mm/gup.c
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								mm/gup.c
									
									
									
									
									
								
							|  | @ -99,12 +99,11 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) | ||||||
|  * |  * | ||||||
|  *    FOLL_GET: page's refcount will be incremented by @refs. |  *    FOLL_GET: page's refcount will be incremented by @refs. | ||||||
|  * |  * | ||||||
|  *    FOLL_PIN on compound pages that are > two pages long: page's refcount will |  *    FOLL_PIN on compound pages: page's refcount will be incremented by | ||||||
|  *    be incremented by @refs, and page[2].hpage_pinned_refcount will be |  *    @refs, and page[1].compound_pincount will be incremented by @refs. | ||||||
|  *    incremented by @refs * GUP_PIN_COUNTING_BIAS. |  | ||||||
|  * |  * | ||||||
|  *    FOLL_PIN on normal pages, or compound pages that are two pages long: |  *    FOLL_PIN on normal pages: page's refcount will be incremented by | ||||||
|  *    page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS. |  *    @refs * GUP_PIN_COUNTING_BIAS. | ||||||
|  * |  * | ||||||
|  * Return: head page (with refcount appropriately incremented) for success, or |  * Return: head page (with refcount appropriately incremented) for success, or | ||||||
|  * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's |  * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's | ||||||
|  | @ -135,16 +134,15 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, | ||||||
| 			return NULL; | 			return NULL; | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * When pinning a compound page of order > 1 (which is | 		 * When pinning a compound page, use an exact count to | ||||||
| 		 * what hpage_pincount_available() checks for), use an | 		 * track it. | ||||||
| 		 * exact count to track it. |  | ||||||
| 		 * | 		 * | ||||||
| 		 * However, be sure to *also* increment the normal page | 		 * However, be sure to *also* increment the normal page | ||||||
| 		 * refcount field at least once, so that the page really | 		 * refcount field at least once, so that the page really | ||||||
| 		 * is pinned.  That's why the refcount from the earlier | 		 * is pinned.  That's why the refcount from the earlier | ||||||
| 		 * try_get_compound_head() is left intact. | 		 * try_get_compound_head() is left intact. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (hpage_pincount_available(page)) | 		if (PageHead(page)) | ||||||
| 			atomic_add(refs, compound_pincount_ptr(page)); | 			atomic_add(refs, compound_pincount_ptr(page)); | ||||||
| 		else | 		else | ||||||
| 			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); | 			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); | ||||||
|  | @ -166,7 +164,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) | ||||||
| 	if (flags & FOLL_PIN) { | 	if (flags & FOLL_PIN) { | ||||||
| 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, | 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, | ||||||
| 				    refs); | 				    refs); | ||||||
| 		if (hpage_pincount_available(page)) | 		if (PageHead(page)) | ||||||
| 			atomic_sub(refs, compound_pincount_ptr(page)); | 			atomic_sub(refs, compound_pincount_ptr(page)); | ||||||
| 		else | 		else | ||||||
| 			refs *= GUP_PIN_COUNTING_BIAS; | 			refs *= GUP_PIN_COUNTING_BIAS; | ||||||
|  | @ -211,7 +209,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) | ||||||
| 		 * increment the normal page refcount field at least once, | 		 * increment the normal page refcount field at least once, | ||||||
| 		 * so that the page really is pinned. | 		 * so that the page really is pinned. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (hpage_pincount_available(page)) { | 		if (PageHead(page)) { | ||||||
| 			page_ref_add(page, 1); | 			page_ref_add(page, 1); | ||||||
| 			atomic_add(1, compound_pincount_ptr(page)); | 			atomic_add(1, compound_pincount_ptr(page)); | ||||||
| 		} else { | 		} else { | ||||||
|  |  | ||||||
|  | @ -1320,7 +1320,9 @@ static void __destroy_compound_gigantic_page(struct page *page, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	set_compound_order(page, 0); | 	set_compound_order(page, 0); | ||||||
|  | #ifdef CONFIG_64BIT | ||||||
| 	page[1].compound_nr = 0; | 	page[1].compound_nr = 0; | ||||||
|  | #endif | ||||||
| 	__ClearPageHead(page); | 	__ClearPageHead(page); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1812,7 +1814,9 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, | ||||||
| 	for (; j < nr_pages; j++, p = mem_map_next(p, page, j)) | 	for (; j < nr_pages; j++, p = mem_map_next(p, page, j)) | ||||||
| 		__ClearPageReserved(p); | 		__ClearPageReserved(p); | ||||||
| 	set_compound_order(page, 0); | 	set_compound_order(page, 0); | ||||||
|  | #ifdef CONFIG_64BIT | ||||||
| 	page[1].compound_nr = 0; | 	page[1].compound_nr = 0; | ||||||
|  | #endif | ||||||
| 	__ClearPageHead(page); | 	__ClearPageHead(page); | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -734,8 +734,7 @@ static void prep_compound_head(struct page *page, unsigned int order) | ||||||
| 	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); | 	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); | ||||||
| 	set_compound_order(page, order); | 	set_compound_order(page, order); | ||||||
| 	atomic_set(compound_mapcount_ptr(page), -1); | 	atomic_set(compound_mapcount_ptr(page), -1); | ||||||
| 	if (hpage_pincount_available(page)) | 	atomic_set(compound_pincount_ptr(page), 0); | ||||||
| 		atomic_set(compound_pincount_ptr(page), 0); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void prep_compound_tail(struct page *head, int tail_idx) | static void prep_compound_tail(struct page *head, int tail_idx) | ||||||
|  |  | ||||||
|  | @ -1219,8 +1219,7 @@ void page_add_new_anon_rmap(struct page *page, | ||||||
| 		VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 		VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||||||
| 		/* increment count (starts at -1) */ | 		/* increment count (starts at -1) */ | ||||||
| 		atomic_set(compound_mapcount_ptr(page), 0); | 		atomic_set(compound_mapcount_ptr(page), 0); | ||||||
| 		if (hpage_pincount_available(page)) | 		atomic_set(compound_pincount_ptr(page), 0); | ||||||
| 			atomic_set(compound_pincount_ptr(page), 0); |  | ||||||
| 
 | 
 | ||||||
| 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr); | 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr); | ||||||
| 	} else { | 	} else { | ||||||
|  | @ -2353,8 +2352,7 @@ void hugepage_add_new_anon_rmap(struct page *page, | ||||||
| { | { | ||||||
| 	BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 	BUG_ON(address < vma->vm_start || address >= vma->vm_end); | ||||||
| 	atomic_set(compound_mapcount_ptr(page), 0); | 	atomic_set(compound_mapcount_ptr(page), 0); | ||||||
| 	if (hpage_pincount_available(page)) | 	atomic_set(compound_pincount_ptr(page), 0); | ||||||
| 		atomic_set(compound_pincount_ptr(page), 0); |  | ||||||
| 
 | 
 | ||||||
| 	__page_set_anon_rmap(page, vma, address, 1); | 	__page_set_anon_rmap(page, vma, address, 1); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Matthew Wilcox (Oracle)
						Matthew Wilcox (Oracle)