mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases
commit53ba78de06("mm/gup: introduce check_and_migrate_movable_folios()") created a new constraint on the pin_user_pages*() API family: a potentially large internal allocation must now occur, for FOLL_LONGTERM cases. A user-visible consequence has now appeared: user space can no longer pin more than 2GB of memory anymore on x86_64. That's because, on a 4KB PAGE_SIZE system, when user space tries to (indirectly, via a device driver that calls pin_user_pages()) pin 2GB, this requires an allocation of a folio pointers array of MAX_PAGE_ORDER size, which is the limit for kmalloc(). In addition to the directly visible effect described above, there is also the problem of adding an unnecessary allocation. The **pages array argument has already been allocated, and there is no need for a redundant **folios array allocation in this case. Fix this by avoiding the new allocation entirely. This is done by referring to either the original page[i] within **pages, or to the associated folio. Thanks to David Hildenbrand for suggesting this approach and for providing the initial implementation (which I've tested and adjusted slightly) as well. [jhubbard@nvidia.com: whitespace tweak, per David] Link: https://lkml.kernel.org/r/131cf9c8-ebc0-4cbb-b722-22fa8527bf3c@nvidia.com [jhubbard@nvidia.com: bypass pofs_get_folio(), per Oscar] Link: https://lkml.kernel.org/r/c1587c7f-9155-45be-bd62-1e36c0dd6923@nvidia.com Link: https://lkml.kernel.org/r/20241105032944.141488-2-jhubbard@nvidia.com Fixes:53ba78de06("mm/gup: introduce check_and_migrate_movable_folios()") Signed-off-by: John Hubbard <jhubbard@nvidia.com> Suggested-by: David Hildenbrand <david@redhat.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Vivek Kasireddy <vivek.kasireddy@intel.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Peter Xu <peterx@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									247d720b2c
								
							
						
					
					
						commit
						94efde1d15
					
				
					 1 changed files with 77 additions and 39 deletions
				
			
		
							
								
								
									
										116
									
								
								mm/gup.c
									
									
									
									
									
								
							
							
						
						
									
										116
									
								
								mm/gup.c
									
									
									
									
									
								
							| 
						 | 
					@ -2273,20 +2273,57 @@ struct page *get_dump_page(unsigned long addr)
 | 
				
			||||||
#endif /* CONFIG_ELF_CORE */
 | 
					#endif /* CONFIG_ELF_CORE */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_MIGRATION
 | 
					#ifdef CONFIG_MIGRATION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * An array of either pages or folios ("pofs"). Although it may seem tempting to
 | 
				
			||||||
 | 
					 * avoid this complication, by simply interpreting a list of folios as a list of
 | 
				
			||||||
 | 
					 * pages, that approach won't work in the longer term, because eventually the
 | 
				
			||||||
 | 
					 * layouts of struct page and struct folio will become completely different.
 | 
				
			||||||
 | 
					 * Furthermore, this pof approach avoids excessive page_folio() calls.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct pages_or_folios {
 | 
				
			||||||
 | 
						union {
 | 
				
			||||||
 | 
							struct page **pages;
 | 
				
			||||||
 | 
							struct folio **folios;
 | 
				
			||||||
 | 
							void **entries;
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
						bool has_folios;
 | 
				
			||||||
 | 
						long nr_entries;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (pofs->has_folios)
 | 
				
			||||||
 | 
							return pofs->folios[i];
 | 
				
			||||||
 | 
						return page_folio(pofs->pages[i]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void pofs_clear_entry(struct pages_or_folios *pofs, long i)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						pofs->entries[i] = NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void pofs_unpin(struct pages_or_folios *pofs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (pofs->has_folios)
 | 
				
			||||||
 | 
							unpin_folios(pofs->folios, pofs->nr_entries);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							unpin_user_pages(pofs->pages, pofs->nr_entries);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Returns the number of collected folios. Return value is always >= 0.
 | 
					 * Returns the number of collected folios. Return value is always >= 0.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static unsigned long collect_longterm_unpinnable_folios(
 | 
					static unsigned long collect_longterm_unpinnable_folios(
 | 
				
			||||||
					struct list_head *movable_folio_list,
 | 
							struct list_head *movable_folio_list,
 | 
				
			||||||
					unsigned long nr_folios,
 | 
							struct pages_or_folios *pofs)
 | 
				
			||||||
					struct folio **folios)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long i, collected = 0;
 | 
						unsigned long i, collected = 0;
 | 
				
			||||||
	struct folio *prev_folio = NULL;
 | 
						struct folio *prev_folio = NULL;
 | 
				
			||||||
	bool drain_allow = true;
 | 
						bool drain_allow = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < nr_folios; i++) {
 | 
						for (i = 0; i < pofs->nr_entries; i++) {
 | 
				
			||||||
		struct folio *folio = folios[i];
 | 
							struct folio *folio = pofs_get_folio(pofs, i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (folio == prev_folio)
 | 
							if (folio == prev_folio)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
| 
						 | 
					@ -2327,16 +2364,15 @@ static unsigned long collect_longterm_unpinnable_folios(
 | 
				
			||||||
 * Returns -EAGAIN if all folios were successfully migrated or -errno for
 | 
					 * Returns -EAGAIN if all folios were successfully migrated or -errno for
 | 
				
			||||||
 * failure (or partial success).
 | 
					 * failure (or partial success).
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int migrate_longterm_unpinnable_folios(
 | 
					static int
 | 
				
			||||||
					struct list_head *movable_folio_list,
 | 
					migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list,
 | 
				
			||||||
					unsigned long nr_folios,
 | 
									   struct pages_or_folios *pofs)
 | 
				
			||||||
					struct folio **folios)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	unsigned long i;
 | 
						unsigned long i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < nr_folios; i++) {
 | 
						for (i = 0; i < pofs->nr_entries; i++) {
 | 
				
			||||||
		struct folio *folio = folios[i];
 | 
							struct folio *folio = pofs_get_folio(pofs, i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (folio_is_device_coherent(folio)) {
 | 
							if (folio_is_device_coherent(folio)) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
| 
						 | 
					@ -2344,7 +2380,7 @@ static int migrate_longterm_unpinnable_folios(
 | 
				
			||||||
			 * convert the pin on the source folio to a normal
 | 
								 * convert the pin on the source folio to a normal
 | 
				
			||||||
			 * reference.
 | 
								 * reference.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			folios[i] = NULL;
 | 
								pofs_clear_entry(pofs, i);
 | 
				
			||||||
			folio_get(folio);
 | 
								folio_get(folio);
 | 
				
			||||||
			gup_put_folio(folio, 1, FOLL_PIN);
 | 
								gup_put_folio(folio, 1, FOLL_PIN);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2363,8 +2399,8 @@ static int migrate_longterm_unpinnable_folios(
 | 
				
			||||||
		 * calling folio_isolate_lru() which takes a reference so the
 | 
							 * calling folio_isolate_lru() which takes a reference so the
 | 
				
			||||||
		 * folio won't be freed if it's migrating.
 | 
							 * folio won't be freed if it's migrating.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		unpin_folio(folios[i]);
 | 
							unpin_folio(folio);
 | 
				
			||||||
		folios[i] = NULL;
 | 
							pofs_clear_entry(pofs, i);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!list_empty(movable_folio_list)) {
 | 
						if (!list_empty(movable_folio_list)) {
 | 
				
			||||||
| 
						 | 
					@ -2387,12 +2423,26 @@ static int migrate_longterm_unpinnable_folios(
 | 
				
			||||||
	return -EAGAIN;
 | 
						return -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
	unpin_folios(folios, nr_folios);
 | 
						pofs_unpin(pofs);
 | 
				
			||||||
	putback_movable_pages(movable_folio_list);
 | 
						putback_movable_pages(movable_folio_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static long
 | 
				
			||||||
 | 
					check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						LIST_HEAD(movable_folio_list);
 | 
				
			||||||
 | 
						unsigned long collected;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						collected = collect_longterm_unpinnable_folios(&movable_folio_list,
 | 
				
			||||||
 | 
											       pofs);
 | 
				
			||||||
 | 
						if (!collected)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Check whether all folios are *allowed* to be pinned indefinitely (long term).
 | 
					 * Check whether all folios are *allowed* to be pinned indefinitely (long term).
 | 
				
			||||||
 * Rather confusingly, all folios in the range are required to be pinned via
 | 
					 * Rather confusingly, all folios in the range are required to be pinned via
 | 
				
			||||||
| 
						 | 
					@ -2417,16 +2467,13 @@ static int migrate_longterm_unpinnable_folios(
 | 
				
			||||||
static long check_and_migrate_movable_folios(unsigned long nr_folios,
 | 
					static long check_and_migrate_movable_folios(unsigned long nr_folios,
 | 
				
			||||||
					     struct folio **folios)
 | 
										     struct folio **folios)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long collected;
 | 
						struct pages_or_folios pofs = {
 | 
				
			||||||
	LIST_HEAD(movable_folio_list);
 | 
							.folios = folios,
 | 
				
			||||||
 | 
							.has_folios = true,
 | 
				
			||||||
 | 
							.nr_entries = nr_folios,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	collected = collect_longterm_unpinnable_folios(&movable_folio_list,
 | 
						return check_and_migrate_movable_pages_or_folios(&pofs);
 | 
				
			||||||
						       nr_folios, folios);
 | 
					 | 
				
			||||||
	if (!collected)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return migrate_longterm_unpinnable_folios(&movable_folio_list,
 | 
					 | 
				
			||||||
						  nr_folios, folios);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -2436,22 +2483,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
 | 
				
			||||||
static long check_and_migrate_movable_pages(unsigned long nr_pages,
 | 
					static long check_and_migrate_movable_pages(unsigned long nr_pages,
 | 
				
			||||||
					    struct page **pages)
 | 
										    struct page **pages)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct folio **folios;
 | 
						struct pages_or_folios pofs = {
 | 
				
			||||||
	long i, ret;
 | 
							.pages = pages,
 | 
				
			||||||
 | 
							.has_folios = false,
 | 
				
			||||||
 | 
							.nr_entries = nr_pages,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
 | 
						return check_and_migrate_movable_pages_or_folios(&pofs);
 | 
				
			||||||
	if (!folios) {
 | 
					 | 
				
			||||||
		unpin_user_pages(pages, nr_pages);
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < nr_pages; i++)
 | 
					 | 
				
			||||||
		folios[i] = page_folio(pages[i]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ret = check_and_migrate_movable_folios(nr_pages, folios);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kfree(folios);
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
static long check_and_migrate_movable_pages(unsigned long nr_pages,
 | 
					static long check_and_migrate_movable_pages(unsigned long nr_pages,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue