mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	mm: swap: allow cache reclaim to skip slot cache
Currently we free the reclaimed slots through slot cache even if the slot is required to be empty immediately. As a result the reclaim caller will see the slot still occupied even after a successful reclaim, and need to keep reclaiming until slot cache get flushed. This caused ineffective or over reclaim when SWAP is under stress. So introduce a new flag allowing the slot to be emptied bypassing the slot cache. [21cnbao@gmail.com: small folios should have nr_pages == 1 but not nr_page == 0] Link: https://lkml.kernel.org/r/20240805015324.45134-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240730-swap-allocator-v5-6-cb9c148b9297@kernel.org Signed-off-by: Kairui Song <kasong@tencent.com> Reported-by: Barry Song <21cnbao@gmail.com> Cc: Chris Li <chrisl@kernel.org> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									650975d2b1
								
							
						
					
					
						commit
						862590ac37
					
				
					 1 changed files with 109 additions and 43 deletions
				
			
		
							
								
								
									
										152
									
								
								mm/swapfile.c
									
									
									
									
									
								
							
							
						
						
									
										152
									
								
								mm/swapfile.c
									
									
									
									
									
								
							| 
						 | 
					@ -53,8 +53,15 @@
 | 
				
			||||||
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 | 
					static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 | 
				
			||||||
				 unsigned char);
 | 
									 unsigned char);
 | 
				
			||||||
static void free_swap_count_continuations(struct swap_info_struct *);
 | 
					static void free_swap_count_continuations(struct swap_info_struct *);
 | 
				
			||||||
 | 
					static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry,
 | 
				
			||||||
 | 
									  unsigned int nr_pages);
 | 
				
			||||||
static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
 | 
					static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
 | 
				
			||||||
			     unsigned int nr_entries);
 | 
								     unsigned int nr_entries);
 | 
				
			||||||
 | 
					static bool folio_swapcache_freeable(struct folio *folio);
 | 
				
			||||||
 | 
					static struct swap_cluster_info *lock_cluster_or_swap_info(
 | 
				
			||||||
 | 
							struct swap_info_struct *si, unsigned long offset);
 | 
				
			||||||
 | 
					static void unlock_cluster_or_swap_info(struct swap_info_struct *si,
 | 
				
			||||||
 | 
										struct swap_cluster_info *ci);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static DEFINE_SPINLOCK(swap_lock);
 | 
					static DEFINE_SPINLOCK(swap_lock);
 | 
				
			||||||
static unsigned int nr_swapfiles;
 | 
					static unsigned int nr_swapfiles;
 | 
				
			||||||
| 
						 | 
					@ -129,8 +136,25 @@ static inline unsigned char swap_count(unsigned char ent)
 | 
				
			||||||
 * corresponding page
 | 
					 * corresponding page
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define TTRS_UNMAPPED		0x2
 | 
					#define TTRS_UNMAPPED		0x2
 | 
				
			||||||
/* Reclaim the swap entry if swap is getting full*/
 | 
					/* Reclaim the swap entry if swap is getting full */
 | 
				
			||||||
#define TTRS_FULL		0x4
 | 
					#define TTRS_FULL		0x4
 | 
				
			||||||
 | 
					/* Reclaim directly, bypass the slot cache and don't touch device lock */
 | 
				
			||||||
 | 
					#define TTRS_DIRECT		0x8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool swap_is_has_cache(struct swap_info_struct *si,
 | 
				
			||||||
 | 
								      unsigned long offset, int nr_pages)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned char *map = si->swap_map + offset;
 | 
				
			||||||
 | 
						unsigned char *map_end = map + nr_pages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							VM_BUG_ON(!(*map & SWAP_HAS_CACHE));
 | 
				
			||||||
 | 
							if (*map != SWAP_HAS_CACHE)
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
						} while (++map < map_end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * returns number of pages in the folio that backs the swap entry. If positive,
 | 
					 * returns number of pages in the folio that backs the swap entry. If positive,
 | 
				
			||||||
| 
						 | 
					@ -141,12 +165,22 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
 | 
				
			||||||
				 unsigned long offset, unsigned long flags)
 | 
									 unsigned long offset, unsigned long flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	swp_entry_t entry = swp_entry(si->type, offset);
 | 
						swp_entry_t entry = swp_entry(si->type, offset);
 | 
				
			||||||
 | 
						struct address_space *address_space = swap_address_space(entry);
 | 
				
			||||||
 | 
						struct swap_cluster_info *ci;
 | 
				
			||||||
	struct folio *folio;
 | 
						struct folio *folio;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret, nr_pages;
 | 
				
			||||||
 | 
						bool need_reclaim;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry));
 | 
						folio = filemap_get_folio(address_space, swap_cache_index(entry));
 | 
				
			||||||
	if (IS_ERR(folio))
 | 
						if (IS_ERR(folio))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* offset could point to the middle of a large folio */
 | 
				
			||||||
 | 
						entry = folio->swap;
 | 
				
			||||||
 | 
						offset = swp_offset(entry);
 | 
				
			||||||
 | 
						nr_pages = folio_nr_pages(folio);
 | 
				
			||||||
 | 
						ret = -nr_pages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * When this function is called from scan_swap_map_slots() and it's
 | 
						 * When this function is called from scan_swap_map_slots() and it's
 | 
				
			||||||
	 * called by vmscan.c at reclaiming folios. So we hold a folio lock
 | 
						 * called by vmscan.c at reclaiming folios. So we hold a folio lock
 | 
				
			||||||
| 
						 | 
					@ -154,14 +188,50 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
 | 
				
			||||||
	 * case and you should use folio_free_swap() with explicit folio_lock()
 | 
						 * case and you should use folio_free_swap() with explicit folio_lock()
 | 
				
			||||||
	 * in usual operations.
 | 
						 * in usual operations.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (folio_trylock(folio)) {
 | 
						if (!folio_trylock(folio))
 | 
				
			||||||
		if ((flags & TTRS_ANYWAY) ||
 | 
							goto out;
 | 
				
			||||||
		    ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
 | 
					
 | 
				
			||||||
		    ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)))
 | 
						need_reclaim = ((flags & TTRS_ANYWAY) ||
 | 
				
			||||||
			ret = folio_free_swap(folio);
 | 
								((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
 | 
				
			||||||
		folio_unlock(folio);
 | 
								((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)));
 | 
				
			||||||
 | 
						if (!need_reclaim || !folio_swapcache_freeable(folio))
 | 
				
			||||||
 | 
							goto out_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * It's safe to delete the folio from swap cache only if the folio's
 | 
				
			||||||
 | 
						 * swap_map is HAS_CACHE only, which means the slots have no page table
 | 
				
			||||||
 | 
						 * reference or pending writeback, and can't be allocated to others.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						ci = lock_cluster_or_swap_info(si, offset);
 | 
				
			||||||
 | 
						need_reclaim = swap_is_has_cache(si, offset, nr_pages);
 | 
				
			||||||
 | 
						unlock_cluster_or_swap_info(si, ci);
 | 
				
			||||||
 | 
						if (!need_reclaim)
 | 
				
			||||||
 | 
							goto out_unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(flags & TTRS_DIRECT)) {
 | 
				
			||||||
 | 
							/* Free through slot cache */
 | 
				
			||||||
 | 
							delete_from_swap_cache(folio);
 | 
				
			||||||
 | 
							folio_set_dirty(folio);
 | 
				
			||||||
 | 
							ret = nr_pages;
 | 
				
			||||||
 | 
							goto out_unlock;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	ret = ret ? folio_nr_pages(folio) : -folio_nr_pages(folio);
 | 
					
 | 
				
			||||||
 | 
						xa_lock_irq(&address_space->i_pages);
 | 
				
			||||||
 | 
						__delete_from_swap_cache(folio, entry, NULL);
 | 
				
			||||||
 | 
						xa_unlock_irq(&address_space->i_pages);
 | 
				
			||||||
 | 
						folio_ref_sub(folio, nr_pages);
 | 
				
			||||||
 | 
						folio_set_dirty(folio);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&si->lock);
 | 
				
			||||||
 | 
						/* Only sinple page folio can be backed by zswap */
 | 
				
			||||||
 | 
						if (nr_pages == 1)
 | 
				
			||||||
 | 
							zswap_invalidate(entry);
 | 
				
			||||||
 | 
						swap_entry_range_free(si, entry, nr_pages);
 | 
				
			||||||
 | 
						spin_unlock(&si->lock);
 | 
				
			||||||
 | 
						ret = nr_pages;
 | 
				
			||||||
 | 
					out_unlock:
 | 
				
			||||||
 | 
						folio_unlock(folio);
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
	folio_put(folio);
 | 
						folio_put(folio);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -895,7 +965,7 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
 | 
				
			||||||
	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
 | 
						if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
 | 
				
			||||||
		int swap_was_freed;
 | 
							int swap_was_freed;
 | 
				
			||||||
		spin_unlock(&si->lock);
 | 
							spin_unlock(&si->lock);
 | 
				
			||||||
		swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
 | 
							swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT);
 | 
				
			||||||
		spin_lock(&si->lock);
 | 
							spin_lock(&si->lock);
 | 
				
			||||||
		/* entry was freed successfully, try to use this again */
 | 
							/* entry was freed successfully, try to use this again */
 | 
				
			||||||
		if (swap_was_freed > 0)
 | 
							if (swap_was_freed > 0)
 | 
				
			||||||
| 
						 | 
					@ -1333,9 +1403,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
 | 
				
			||||||
	unsigned long offset = swp_offset(entry);
 | 
						unsigned long offset = swp_offset(entry);
 | 
				
			||||||
	struct swap_cluster_info *ci;
 | 
						struct swap_cluster_info *ci;
 | 
				
			||||||
	struct swap_info_struct *si;
 | 
						struct swap_info_struct *si;
 | 
				
			||||||
	unsigned char *map;
 | 
					 | 
				
			||||||
	unsigned int i, free_entries = 0;
 | 
					 | 
				
			||||||
	unsigned char val;
 | 
					 | 
				
			||||||
	int size = 1 << swap_entry_order(folio_order(folio));
 | 
						int size = 1 << swap_entry_order(folio_order(folio));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	si = _swap_info_get(entry);
 | 
						si = _swap_info_get(entry);
 | 
				
			||||||
| 
						 | 
					@ -1343,23 +1410,14 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ci = lock_cluster_or_swap_info(si, offset);
 | 
						ci = lock_cluster_or_swap_info(si, offset);
 | 
				
			||||||
	if (size > 1) {
 | 
						if (size > 1 && swap_is_has_cache(si, offset, size)) {
 | 
				
			||||||
		map = si->swap_map + offset;
 | 
							unlock_cluster_or_swap_info(si, ci);
 | 
				
			||||||
		for (i = 0; i < size; i++) {
 | 
							spin_lock(&si->lock);
 | 
				
			||||||
			val = map[i];
 | 
							swap_entry_range_free(si, entry, size);
 | 
				
			||||||
			VM_BUG_ON(!(val & SWAP_HAS_CACHE));
 | 
							spin_unlock(&si->lock);
 | 
				
			||||||
			if (val == SWAP_HAS_CACHE)
 | 
							return;
 | 
				
			||||||
				free_entries++;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		if (free_entries == size) {
 | 
					 | 
				
			||||||
			unlock_cluster_or_swap_info(si, ci);
 | 
					 | 
				
			||||||
			spin_lock(&si->lock);
 | 
					 | 
				
			||||||
			swap_entry_range_free(si, entry, size);
 | 
					 | 
				
			||||||
			spin_unlock(&si->lock);
 | 
					 | 
				
			||||||
			return;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	for (i = 0; i < size; i++, entry.val++) {
 | 
						for (int i = 0; i < size; i++, entry.val++) {
 | 
				
			||||||
		if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
 | 
							if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
 | 
				
			||||||
			unlock_cluster_or_swap_info(si, ci);
 | 
								unlock_cluster_or_swap_info(si, ci);
 | 
				
			||||||
			free_swap_slot(entry);
 | 
								free_swap_slot(entry);
 | 
				
			||||||
| 
						 | 
					@ -1519,16 +1577,7 @@ static bool folio_swapped(struct folio *folio)
 | 
				
			||||||
	return swap_page_trans_huge_swapped(si, entry, folio_order(folio));
 | 
						return swap_page_trans_huge_swapped(si, entry, folio_order(folio));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					static bool folio_swapcache_freeable(struct folio *folio)
 | 
				
			||||||
 * folio_free_swap() - Free the swap space used for this folio.
 | 
					 | 
				
			||||||
 * @folio: The folio to remove.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * If swap is getting full, or if there are no more mappings of this folio,
 | 
					 | 
				
			||||||
 * then call folio_free_swap to free its swap space.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Return: true if we were able to release the swap space.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
bool folio_free_swap(struct folio *folio)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 | 
						VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1536,8 +1585,6 @@ bool folio_free_swap(struct folio *folio)
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
	if (folio_test_writeback(folio))
 | 
						if (folio_test_writeback(folio))
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
	if (folio_swapped(folio))
 | 
					 | 
				
			||||||
		return false;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Once hibernation has begun to create its image of memory,
 | 
						 * Once hibernation has begun to create its image of memory,
 | 
				
			||||||
| 
						 | 
					@ -1557,6 +1604,25 @@ bool folio_free_swap(struct folio *folio)
 | 
				
			||||||
	if (pm_suspended_storage())
 | 
						if (pm_suspended_storage())
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * folio_free_swap() - Free the swap space used for this folio.
 | 
				
			||||||
 | 
					 * @folio: The folio to remove.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * If swap is getting full, or if there are no more mappings of this folio,
 | 
				
			||||||
 | 
					 * then call folio_free_swap to free its swap space.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Return: true if we were able to release the swap space.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					bool folio_free_swap(struct folio *folio)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (!folio_swapcache_freeable(folio))
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
						if (folio_swapped(folio))
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	delete_from_swap_cache(folio);
 | 
						delete_from_swap_cache(folio);
 | 
				
			||||||
	folio_set_dirty(folio);
 | 
						folio_set_dirty(folio);
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
| 
						 | 
					@ -1633,7 +1699,7 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr)
 | 
				
			||||||
			 * to the next boundary.
 | 
								 * to the next boundary.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			nr = __try_to_reclaim_swap(si, offset,
 | 
								nr = __try_to_reclaim_swap(si, offset,
 | 
				
			||||||
					      TTRS_UNMAPPED | TTRS_FULL);
 | 
											   TTRS_UNMAPPED | TTRS_FULL);
 | 
				
			||||||
			if (nr == 0)
 | 
								if (nr == 0)
 | 
				
			||||||
				nr = 1;
 | 
									nr = 1;
 | 
				
			||||||
			else if (nr < 0)
 | 
								else if (nr < 0)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue