forked from mirrors/linux
		
	mm, swap: simplify folio swap allocation
With slot cache gone, clean up the allocation helpers even more. folio_alloc_swap will be the only entry for allocation and adding the folio to swap cache (except suspend), making it opposite of folio_free_swap. Link: https://lkml.kernel.org/r/20250313165935.63303-8-ryncsn@gmail.com Signed-off-by: Kairui Song <kasong@tencent.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <v-songbaohua@oppo.com> Cc: Chris Li <chrisl@kernel.org> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0ff67f990b
								
							
						
					
					
						commit
						b487a2da35
					
				
					 6 changed files with 96 additions and 125 deletions
				
			
		|  | @ -478,7 +478,7 @@ static inline long get_nr_swap_pages(void) | |||
| } | ||||
| 
 | ||||
| extern void si_swapinfo(struct sysinfo *); | ||||
| swp_entry_t folio_alloc_swap(struct folio *folio); | ||||
| int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask); | ||||
| bool folio_free_swap(struct folio *folio); | ||||
| void put_swap_folio(struct folio *folio, swp_entry_t entry); | ||||
| extern swp_entry_t get_swap_page_of_type(int); | ||||
|  | @ -586,11 +586,9 @@ static inline int swp_swapcount(swp_entry_t entry) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline swp_entry_t folio_alloc_swap(struct folio *folio) | ||||
| static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask) | ||||
| { | ||||
| 	swp_entry_t entry; | ||||
| 	entry.val = 0; | ||||
| 	return entry; | ||||
| 	return -EINVAL; | ||||
| } | ||||
| 
 | ||||
| static inline bool folio_free_swap(struct folio *folio) | ||||
|  |  | |||
							
								
								
									
										21
									
								
								mm/shmem.c
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								mm/shmem.c
									
									
									
									
									
								
							|  | @ -1533,7 +1533,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 	struct inode *inode = mapping->host; | ||||
| 	struct shmem_inode_info *info = SHMEM_I(inode); | ||||
| 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||||
| 	swp_entry_t swap; | ||||
| 	pgoff_t index; | ||||
| 	int nr_pages; | ||||
| 	bool split = false; | ||||
|  | @ -1615,14 +1614,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 		folio_mark_uptodate(folio); | ||||
| 	} | ||||
| 
 | ||||
| 	swap = folio_alloc_swap(folio); | ||||
| 	if (!swap.val) { | ||||
| 		if (nr_pages > 1) | ||||
| 			goto try_split; | ||||
| 
 | ||||
| 		goto redirty; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Add inode to shmem_unuse()'s list of swapped-out inodes, | ||||
| 	 * if it's not already there.  Do it now before the folio is | ||||
|  | @ -1635,20 +1626,20 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 	if (list_empty(&info->swaplist)) | ||||
| 		list_add(&info->swaplist, &shmem_swaplist); | ||||
| 
 | ||||
| 	if (add_to_swap_cache(folio, swap, | ||||
| 			__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, | ||||
| 			NULL) == 0) { | ||||
| 	if (!folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN)) { | ||||
| 		shmem_recalc_inode(inode, 0, nr_pages); | ||||
| 		swap_shmem_alloc(swap, nr_pages); | ||||
| 		shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap)); | ||||
| 		swap_shmem_alloc(folio->swap, nr_pages); | ||||
| 		shmem_delete_from_page_cache(folio, swp_to_radix_entry(folio->swap)); | ||||
| 
 | ||||
| 		mutex_unlock(&shmem_swaplist_mutex); | ||||
| 		BUG_ON(folio_mapped(folio)); | ||||
| 		return swap_writepage(&folio->page, wbc); | ||||
| 	} | ||||
| 
 | ||||
| 	list_del_init(&info->swaplist); | ||||
| 	mutex_unlock(&shmem_swaplist_mutex); | ||||
| 	put_swap_folio(folio, swap); | ||||
| 	if (nr_pages > 1) | ||||
| 		goto try_split; | ||||
| redirty: | ||||
| 	folio_mark_dirty(folio); | ||||
| 	if (wbc->for_reclaim) | ||||
|  |  | |||
|  | @ -50,7 +50,6 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry) | |||
| } | ||||
| 
 | ||||
| void show_swap_cache_info(void); | ||||
| bool add_to_swap(struct folio *folio); | ||||
| void *get_shadow_from_swap_cache(swp_entry_t entry); | ||||
| int add_to_swap_cache(struct folio *folio, swp_entry_t entry, | ||||
| 		      gfp_t gfp, void **shadowp); | ||||
|  | @ -163,11 +162,6 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping, | |||
| 	return filemap_get_folio(mapping, index); | ||||
| } | ||||
| 
 | ||||
| static inline bool add_to_swap(struct folio *folio) | ||||
| { | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static inline void *get_shadow_from_swap_cache(swp_entry_t entry) | ||||
| { | ||||
| 	return NULL; | ||||
|  |  | |||
|  | @ -166,63 +166,6 @@ void __delete_from_swap_cache(struct folio *folio, | |||
| 	__lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * add_to_swap - allocate swap space for a folio | ||||
|  * @folio: folio we want to move to swap | ||||
|  * | ||||
|  * Allocate swap space for the folio and add the folio to the | ||||
|  * swap cache. | ||||
|  * | ||||
|  * Context: Caller needs to hold the folio lock. | ||||
|  * Return: Whether the folio was added to the swap cache. | ||||
|  */ | ||||
| bool add_to_swap(struct folio *folio) | ||||
| { | ||||
| 	swp_entry_t entry; | ||||
| 	int err; | ||||
| 
 | ||||
| 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); | ||||
| 	VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); | ||||
| 
 | ||||
| 	entry = folio_alloc_swap(folio); | ||||
| 	if (!entry.val) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * XArray node allocations from PF_MEMALLOC contexts could | ||||
| 	 * completely exhaust the page allocator. __GFP_NOMEMALLOC | ||||
| 	 * stops emergency reserves from being allocated. | ||||
| 	 * | ||||
| 	 * TODO: this could cause a theoretical memory reclaim | ||||
| 	 * deadlock in the swap out path. | ||||
| 	 */ | ||||
| 	/*
 | ||||
| 	 * Add it to the swap cache. | ||||
| 	 */ | ||||
| 	err = add_to_swap_cache(folio, entry, | ||||
| 			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); | ||||
| 	if (err) | ||||
| 		goto fail; | ||||
| 	/*
 | ||||
| 	 * Normally the folio will be dirtied in unmap because its | ||||
| 	 * pte should be dirty. A special case is MADV_FREE page. The | ||||
| 	 * page's pte could have dirty bit cleared but the folio's | ||||
| 	 * SwapBacked flag is still set because clearing the dirty bit | ||||
| 	 * and SwapBacked flag has no lock protected. For such folio, | ||||
| 	 * unmap will not set dirty bit for it, so folio reclaim will | ||||
| 	 * not write the folio out. This can cause data corruption when | ||||
| 	 * the folio is swapped in later. Always setting the dirty flag | ||||
| 	 * for the folio solves the problem. | ||||
| 	 */ | ||||
| 	folio_mark_dirty(folio); | ||||
| 
 | ||||
| 	return true; | ||||
| 
 | ||||
| fail: | ||||
| 	put_swap_folio(folio, entry); | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This must be called only on folios that have | ||||
|  * been verified to be in the swap cache and locked. | ||||
|  |  | |||
							
								
								
									
										113
									
								
								mm/swapfile.c
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								mm/swapfile.c
									
									
									
									
									
								
							|  | @ -1176,9 +1176,8 @@ static bool get_swap_device_info(struct swap_info_struct *si) | |||
|  * Fast path try to get swap entries with specified order from current | ||||
|  * CPU's swap entry pool (a cluster). | ||||
|  */ | ||||
| static int swap_alloc_fast(swp_entry_t *entry, | ||||
| 			   unsigned char usage, | ||||
| 			   int order) | ||||
| static bool swap_alloc_fast(swp_entry_t *entry, | ||||
| 			    int order) | ||||
| { | ||||
| 	struct swap_cluster_info *ci; | ||||
| 	struct swap_info_struct *si; | ||||
|  | @ -1197,7 +1196,7 @@ static int swap_alloc_fast(swp_entry_t *entry, | |||
| 	if (cluster_is_usable(ci, order)) { | ||||
| 		if (cluster_is_empty(ci)) | ||||
| 			offset = cluster_offset(si, ci); | ||||
| 		found = alloc_swap_scan_cluster(si, ci, offset, order, usage); | ||||
| 		found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE); | ||||
| 		if (found) | ||||
| 			*entry = swp_entry(si->type, found); | ||||
| 	} else { | ||||
|  | @ -1208,47 +1207,30 @@ static int swap_alloc_fast(swp_entry_t *entry, | |||
| 	return !!found; | ||||
| } | ||||
| 
 | ||||
| swp_entry_t folio_alloc_swap(struct folio *folio) | ||||
| /* Rotate the device and switch to a new cluster */ | ||||
| static bool swap_alloc_slow(swp_entry_t *entry, | ||||
| 			    int order) | ||||
| { | ||||
| 	unsigned int order = folio_order(folio); | ||||
| 	unsigned int size = 1 << order; | ||||
| 	struct swap_info_struct *si, *next; | ||||
| 	swp_entry_t entry = {}; | ||||
| 	unsigned long offset; | ||||
| 	int node; | ||||
| 	unsigned long offset; | ||||
| 	struct swap_info_struct *si, *next; | ||||
| 
 | ||||
| 	if (order) { | ||||
| 		/*
 | ||||
| 		 * Should not even be attempting large allocations when huge | ||||
| 		 * page swap is disabled. Warn and fail the allocation. | ||||
| 		 */ | ||||
| 		if (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER) { | ||||
| 			VM_WARN_ON_ONCE(1); | ||||
| 			return entry; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* Fast path using percpu cluster */ | ||||
| 	local_lock(&percpu_swap_cluster.lock); | ||||
| 	if (swap_alloc_fast(&entry, SWAP_HAS_CACHE, order)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	/* Rotate the device and switch to a new cluster */ | ||||
| 	node = numa_node_id(); | ||||
| 	spin_lock(&swap_avail_lock); | ||||
| start_over: | ||||
| 	node = numa_node_id(); | ||||
| 	plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { | ||||
| 		/* Rotate the device and switch to a new cluster */ | ||||
| 		plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); | ||||
| 		spin_unlock(&swap_avail_lock); | ||||
| 		if (get_swap_device_info(si)) { | ||||
| 			offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); | ||||
| 			put_swap_device(si); | ||||
| 			if (offset) { | ||||
| 				entry = swp_entry(si->type, offset); | ||||
| 				goto out; | ||||
| 				*entry = swp_entry(si->type, offset); | ||||
| 				return true; | ||||
| 			} | ||||
| 			if (order) | ||||
| 				goto out; | ||||
| 				return false; | ||||
| 		} | ||||
| 
 | ||||
| 		spin_lock(&swap_avail_lock); | ||||
|  | @ -1267,16 +1249,67 @@ swp_entry_t folio_alloc_swap(struct folio *folio) | |||
| 			goto start_over; | ||||
| 	} | ||||
| 	spin_unlock(&swap_avail_lock); | ||||
| out: | ||||
| 	local_unlock(&percpu_swap_cluster.lock); | ||||
| 	/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ | ||||
| 	if (mem_cgroup_try_charge_swap(folio, entry)) { | ||||
| 		put_swap_folio(folio, entry); | ||||
| 		entry.val = 0; | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * folio_alloc_swap - allocate swap space for a folio | ||||
|  * @folio: folio we want to move to swap | ||||
|  * @gfp: gfp mask for shadow nodes | ||||
|  * | ||||
|  * Allocate swap space for the folio and add the folio to the | ||||
|  * swap cache. | ||||
|  * | ||||
|  * Context: Caller needs to hold the folio lock. | ||||
|  * Return: Whether the folio was added to the swap cache. | ||||
|  */ | ||||
| int folio_alloc_swap(struct folio *folio, gfp_t gfp) | ||||
| { | ||||
| 	unsigned int order = folio_order(folio); | ||||
| 	unsigned int size = 1 << order; | ||||
| 	swp_entry_t entry = {}; | ||||
| 
 | ||||
| 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); | ||||
| 	VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Should not even be attempting large allocations when huge | ||||
| 	 * page swap is disabled. Warn and fail the allocation. | ||||
| 	 */ | ||||
| 	if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { | ||||
| 		VM_WARN_ON_ONCE(1); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 	if (entry.val) | ||||
| 		atomic_long_sub(size, &nr_swap_pages); | ||||
| 	return entry; | ||||
| 
 | ||||
| 	local_lock(&percpu_swap_cluster.lock); | ||||
| 	if (!swap_alloc_fast(&entry, order)) | ||||
| 		swap_alloc_slow(&entry, order); | ||||
| 	local_unlock(&percpu_swap_cluster.lock); | ||||
| 
 | ||||
| 	/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ | ||||
| 	if (mem_cgroup_try_charge_swap(folio, entry)) | ||||
| 		goto out_free; | ||||
| 
 | ||||
| 	if (!entry.val) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * XArray node allocations from PF_MEMALLOC contexts could | ||||
| 	 * completely exhaust the page allocator. __GFP_NOMEMALLOC | ||||
| 	 * stops emergency reserves from being allocated. | ||||
| 	 * | ||||
| 	 * TODO: this could cause a theoretical memory reclaim | ||||
| 	 * deadlock in the swap out path. | ||||
| 	 */ | ||||
| 	if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL)) | ||||
| 		goto out_free; | ||||
| 
 | ||||
| 	atomic_long_sub(size, &nr_swap_pages); | ||||
| 	return 0; | ||||
| 
 | ||||
| out_free: | ||||
| 	put_swap_folio(folio, entry); | ||||
| 	return -ENOMEM; | ||||
| } | ||||
| 
 | ||||
| static struct swap_info_struct *_swap_info_get(swp_entry_t entry) | ||||
|  |  | |||
							
								
								
									
										16
									
								
								mm/vmscan.c
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								mm/vmscan.c
									
									
									
									
									
								
							|  | @ -1289,7 +1289,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, | |||
| 					    split_folio_to_list(folio, folio_list)) | ||||
| 						goto activate_locked; | ||||
| 				} | ||||
| 				if (!add_to_swap(folio)) { | ||||
| 				if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN)) { | ||||
| 					int __maybe_unused order = folio_order(folio); | ||||
| 
 | ||||
| 					if (!folio_test_large(folio)) | ||||
|  | @ -1305,9 +1305,21 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, | |||
| 					} | ||||
| #endif | ||||
| 					count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK); | ||||
| 					if (!add_to_swap(folio)) | ||||
| 					if (folio_alloc_swap(folio, __GFP_HIGH | __GFP_NOWARN)) | ||||
| 						goto activate_locked_split; | ||||
| 				} | ||||
| 				/*
 | ||||
| 				 * Normally the folio will be dirtied in unmap because its | ||||
| 				 * pte should be dirty. A special case is MADV_FREE page. The | ||||
| 				 * page's pte could have dirty bit cleared but the folio's | ||||
| 				 * SwapBacked flag is still set because clearing the dirty bit | ||||
| 				 * and SwapBacked flag has no lock protected. For such folio, | ||||
| 				 * unmap will not set dirty bit for it, so folio reclaim will | ||||
| 				 * not write the folio out. This can cause data corruption when | ||||
| 				 * the folio is swapped in later. Always setting the dirty flag | ||||
| 				 * for the folio solves the problem. | ||||
| 				 */ | ||||
| 				folio_mark_dirty(folio); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Kairui Song
						Kairui Song