forked from mirrors/linux
		
	mm: swap: clean up swap readahead
When I see recent change of swap readahead, I am very unhappy about current code structure which diverges two swap readahead algorithm in do_swap_page. This patch is to clean it up. Main motivation is that fault handler doesn't need to be aware of readahead algorithms but just should call swapin_readahead. As first step, this patch cleans up a little bit but not perfect (I just separate for review easier) so next patch will make the goal complete. [minchan@kernel.org: do not check readahead flag with THP anon] Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Hugh Dickins <hughd@google.com> Cc: Huang Ying <ying.huang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									e830c63a62
								
							
						
					
					
						commit
						eaf649ebc3
					
				
					 3 changed files with 62 additions and 77 deletions
				
			
		| 
						 | 
				
			
			@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 | 
			
		|||
			bool *new_page_allocated);
 | 
			
		||||
extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 | 
			
		||||
			struct vm_area_struct *vma, unsigned long addr);
 | 
			
		||||
 | 
			
		||||
extern struct page *swap_readahead_detect(struct vm_fault *vmf,
 | 
			
		||||
					  struct vma_swap_readahead *swap_ra);
 | 
			
		||||
extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		||||
					   struct vm_fault *vmf,
 | 
			
		||||
					   struct vma_swap_readahead *swap_ra);
 | 
			
		||||
					   struct vm_fault *vmf);
 | 
			
		||||
 | 
			
		||||
/* linux/mm/swapfile.c */
 | 
			
		||||
extern atomic_long_t nr_swap_pages;
 | 
			
		||||
| 
						 | 
				
			
			@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void)
 | 
			
		|||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline struct page *swap_readahead_detect(
 | 
			
		||||
	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
 | 
			
		||||
{
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline struct page *do_swap_page_readahead(
 | 
			
		||||
	swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		||||
	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
 | 
			
		||||
static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
 | 
			
		||||
				gfp_t gfp_mask, struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										24
									
								
								mm/memory.c
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								mm/memory.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range);
 | 
			
		|||
int do_swap_page(struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = vmf->vma;
 | 
			
		||||
	struct page *page = NULL, *swapcache = NULL;
 | 
			
		||||
	struct page *page = NULL, *swapcache;
 | 
			
		||||
	struct mem_cgroup *memcg;
 | 
			
		||||
	struct vma_swap_readahead swap_ra;
 | 
			
		||||
	swp_entry_t entry;
 | 
			
		||||
	pte_t pte;
 | 
			
		||||
	int locked;
 | 
			
		||||
	int exclusive = 0;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
	bool vma_readahead = swap_use_vma_readahead();
 | 
			
		||||
 | 
			
		||||
	if (vma_readahead) {
 | 
			
		||||
		page = swap_readahead_detect(vmf, &swap_ra);
 | 
			
		||||
		swapcache = page;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
 | 
			
		||||
		if (page)
 | 
			
		||||
			put_page(page);
 | 
			
		||||
	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	entry = pte_to_swp_entry(vmf->orig_pte);
 | 
			
		||||
	if (unlikely(non_swap_entry(entry))) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf)
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 | 
			
		||||
	if (!page) {
 | 
			
		||||
		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
 | 
			
		||||
					 vmf->address);
 | 
			
		||||
	page = lookup_swap_cache(entry, vma, vmf->address);
 | 
			
		||||
	swapcache = page;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!page) {
 | 
			
		||||
		struct swap_info_struct *si = swp_swap_info(entry);
 | 
			
		||||
| 
						 | 
				
			
			@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf)
 | 
			
		|||
				swap_readpage(page, true);
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			if (vma_readahead)
 | 
			
		||||
			if (swap_use_vma_readahead())
 | 
			
		||||
				page = do_swap_page_readahead(entry,
 | 
			
		||||
					GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
 | 
			
		||||
					GFP_HIGHUSER_MOVABLE, vmf);
 | 
			
		||||
			else
 | 
			
		||||
				page = swapin_readahead(entry,
 | 
			
		||||
				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
 | 
			
		||||
| 
						 | 
				
			
			@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf)
 | 
			
		|||
		 */
 | 
			
		||||
		ret = VM_FAULT_HWPOISON;
 | 
			
		||||
		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 | 
			
		||||
		swapcache = page;
 | 
			
		||||
		goto out_release;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
 | 
			
		|||
			       unsigned long addr)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long ra_info;
 | 
			
		||||
	int win, hits, readahead;
 | 
			
		||||
 | 
			
		||||
	page = find_get_page(swap_address_space(entry), swp_offset(entry));
 | 
			
		||||
 | 
			
		||||
	INC_CACHE_INFO(find_total);
 | 
			
		||||
	if (page) {
 | 
			
		||||
		bool vma_ra = swap_use_vma_readahead();
 | 
			
		||||
		bool readahead;
 | 
			
		||||
 | 
			
		||||
		INC_CACHE_INFO(find_success);
 | 
			
		||||
		/*
 | 
			
		||||
		 * At the moment, we don't support PG_readahead for anon THP
 | 
			
		||||
		 * so let's bail out rather than confusing the readahead stat.
 | 
			
		||||
		 */
 | 
			
		||||
		if (unlikely(PageTransCompound(page)))
 | 
			
		||||
			return page;
 | 
			
		||||
 | 
			
		||||
		readahead = TestClearPageReadahead(page);
 | 
			
		||||
		if (vma) {
 | 
			
		||||
			ra_info = GET_SWAP_RA_VAL(vma);
 | 
			
		||||
			win = SWAP_RA_WIN(ra_info);
 | 
			
		||||
			hits = SWAP_RA_HITS(ra_info);
 | 
			
		||||
		if (vma && vma_ra) {
 | 
			
		||||
			unsigned long ra_val;
 | 
			
		||||
			int win, hits;
 | 
			
		||||
 | 
			
		||||
			ra_val = GET_SWAP_RA_VAL(vma);
 | 
			
		||||
			win = SWAP_RA_WIN(ra_val);
 | 
			
		||||
			hits = SWAP_RA_HITS(ra_val);
 | 
			
		||||
			if (readahead)
 | 
			
		||||
				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
 | 
			
		||||
			atomic_long_set(&vma->swap_readahead_info,
 | 
			
		||||
					SWAP_RA_VAL(addr, win, hits));
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (readahead) {
 | 
			
		||||
			count_vm_event(SWAP_RA_HIT);
 | 
			
		||||
			if (!vma)
 | 
			
		||||
			if (!vma || !vma_ra)
 | 
			
		||||
				atomic_inc(&swapin_readahead_hits);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return page;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 | 
			
		|||
			continue;
 | 
			
		||||
		if (page_allocated) {
 | 
			
		||||
			swap_readpage(page, false);
 | 
			
		||||
			if (offset != entry_offset &&
 | 
			
		||||
			    likely(!PageTransCompound(page))) {
 | 
			
		||||
			if (offset != entry_offset) {
 | 
			
		||||
				SetPageReadahead(page);
 | 
			
		||||
				count_vm_event(SWAP_RA);
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
 | 
			
		|||
		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct page *swap_readahead_detect(struct vm_fault *vmf,
 | 
			
		||||
				   struct vma_swap_readahead *swap_ra)
 | 
			
		||||
static void swap_ra_info(struct vm_fault *vmf,
 | 
			
		||||
			struct vma_swap_readahead *ra_info)
 | 
			
		||||
{
 | 
			
		||||
	struct vm_area_struct *vma = vmf->vma;
 | 
			
		||||
	unsigned long swap_ra_info;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	unsigned long ra_val;
 | 
			
		||||
	swp_entry_t entry;
 | 
			
		||||
	unsigned long faddr, pfn, fpfn;
 | 
			
		||||
	unsigned long start, end;
 | 
			
		||||
	pte_t *pte;
 | 
			
		||||
	pte_t *pte, *orig_pte;
 | 
			
		||||
	unsigned int max_win, hits, prev_win, win, left;
 | 
			
		||||
#ifndef CONFIG_64BIT
 | 
			
		||||
	pte_t *tpte;
 | 
			
		||||
| 
						 | 
				
			
			@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 | 
			
		|||
	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
 | 
			
		||||
			     SWAP_RA_ORDER_CEILING);
 | 
			
		||||
	if (max_win == 1) {
 | 
			
		||||
		swap_ra->win = 1;
 | 
			
		||||
		return NULL;
 | 
			
		||||
		ra_info->win = 1;
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	faddr = vmf->address;
 | 
			
		||||
	entry = pte_to_swp_entry(vmf->orig_pte);
 | 
			
		||||
	if ((unlikely(non_swap_entry(entry))))
 | 
			
		||||
		return NULL;
 | 
			
		||||
	page = lookup_swap_cache(entry, vma, faddr);
 | 
			
		||||
	if (page)
 | 
			
		||||
		return page;
 | 
			
		||||
	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
 | 
			
		||||
	entry = pte_to_swp_entry(*pte);
 | 
			
		||||
	if ((unlikely(non_swap_entry(entry)))) {
 | 
			
		||||
		pte_unmap(orig_pte);
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fpfn = PFN_DOWN(faddr);
 | 
			
		||||
	swap_ra_info = GET_SWAP_RA_VAL(vma);
 | 
			
		||||
	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
 | 
			
		||||
	prev_win = SWAP_RA_WIN(swap_ra_info);
 | 
			
		||||
	hits = SWAP_RA_HITS(swap_ra_info);
 | 
			
		||||
	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
 | 
			
		||||
	ra_val = GET_SWAP_RA_VAL(vma);
 | 
			
		||||
	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
 | 
			
		||||
	prev_win = SWAP_RA_WIN(ra_val);
 | 
			
		||||
	hits = SWAP_RA_HITS(ra_val);
 | 
			
		||||
	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
 | 
			
		||||
					       max_win, prev_win);
 | 
			
		||||
	atomic_long_set(&vma->swap_readahead_info,
 | 
			
		||||
			SWAP_RA_VAL(faddr, win, 0));
 | 
			
		||||
 | 
			
		||||
	if (win == 1)
 | 
			
		||||
		return NULL;
 | 
			
		||||
	if (win == 1) {
 | 
			
		||||
		pte_unmap(orig_pte);
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Copy the PTEs because the page table may be unmapped */
 | 
			
		||||
	if (fpfn == pfn + 1)
 | 
			
		||||
| 
						 | 
				
			
			@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 | 
			
		|||
		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
 | 
			
		||||
				  &start, &end);
 | 
			
		||||
	}
 | 
			
		||||
	swap_ra->nr_pte = end - start;
 | 
			
		||||
	swap_ra->offset = fpfn - start;
 | 
			
		||||
	pte = vmf->pte - swap_ra->offset;
 | 
			
		||||
	ra_info->nr_pte = end - start;
 | 
			
		||||
	ra_info->offset = fpfn - start;
 | 
			
		||||
	pte -= ra_info->offset;
 | 
			
		||||
#ifdef CONFIG_64BIT
 | 
			
		||||
	swap_ra->ptes = pte;
 | 
			
		||||
	ra_info->ptes = pte;
 | 
			
		||||
#else
 | 
			
		||||
	tpte = swap_ra->ptes;
 | 
			
		||||
	tpte = ra_info->ptes;
 | 
			
		||||
	for (pfn = start; pfn != end; pfn++)
 | 
			
		||||
		*tpte++ = *pte++;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	return NULL;
 | 
			
		||||
	pte_unmap(orig_pte);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		||||
				    struct vm_fault *vmf,
 | 
			
		||||
				    struct vma_swap_readahead *swap_ra)
 | 
			
		||||
				    struct vm_fault *vmf)
 | 
			
		||||
{
 | 
			
		||||
	struct blk_plug plug;
 | 
			
		||||
	struct vm_area_struct *vma = vmf->vma;
 | 
			
		||||
| 
						 | 
				
			
			@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		|||
	swp_entry_t entry;
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
	bool page_allocated;
 | 
			
		||||
	struct vma_swap_readahead ra_info = {0,};
 | 
			
		||||
 | 
			
		||||
	if (swap_ra->win == 1)
 | 
			
		||||
	swap_ra_info(vmf, &ra_info);
 | 
			
		||||
	if (ra_info.win == 1)
 | 
			
		||||
		goto skip;
 | 
			
		||||
 | 
			
		||||
	blk_start_plug(&plug);
 | 
			
		||||
	for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
 | 
			
		||||
	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
 | 
			
		||||
	     i++, pte++) {
 | 
			
		||||
		pentry = *pte;
 | 
			
		||||
		if (pte_none(pentry))
 | 
			
		||||
| 
						 | 
				
			
			@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		|||
			continue;
 | 
			
		||||
		if (page_allocated) {
 | 
			
		||||
			swap_readpage(page, false);
 | 
			
		||||
			if (i != swap_ra->offset &&
 | 
			
		||||
			    likely(!PageTransCompound(page))) {
 | 
			
		||||
			if (i != ra_info.offset) {
 | 
			
		||||
				SetPageReadahead(page);
 | 
			
		||||
				count_vm_event(SWAP_RA);
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 | 
			
		|||
	lru_add_drain();
 | 
			
		||||
skip:
 | 
			
		||||
	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
 | 
			
		||||
				     swap_ra->win == 1);
 | 
			
		||||
				     ra_info.win == 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SYSFS
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue