forked from mirrors/linux
		
	shmem: add huge pages support
Here's basic implementation of huge pages support for shmem/tmpfs.
It's all pretty streight-forward:
  - shmem_getpage() allcoates huge page if it can and try to inserd into
    radix tree with shmem_add_to_page_cache();
  - shmem_add_to_page_cache() puts the page onto radix-tree if there's
    space for it;
  - shmem_undo_range() removes huge pages, if it fully within range.
    Partial truncate of huge pages zero out this part of THP.
    This have visible effect on fallocate(FALLOC_FL_PUNCH_HOLE)
    behaviour. As we don't really create hole in this case,
    lseek(SEEK_HOLE) may have inconsistent results depending what
    pages happened to be allocated.
  - no need to change shmem_fault: core-mm will map an compound page as
    huge if VMA is suitable;
Link: http://lkml.kernel.org/r/1466021202-61880-30-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									c01d5b3007
								
							
						
					
					
						commit
						800d8c63b2
					
				
					 9 changed files with 331 additions and 70 deletions
				
			
		|  | @ -156,6 +156,8 @@ void put_huge_zero_page(void); | ||||||
| 
 | 
 | ||||||
| #define transparent_hugepage_enabled(__vma) 0 | #define transparent_hugepage_enabled(__vma) 0 | ||||||
| 
 | 
 | ||||||
|  | static inline void prep_transhuge_page(struct page *page) {} | ||||||
|  | 
 | ||||||
| #define transparent_hugepage_flags 0UL | #define transparent_hugepage_flags 0UL | ||||||
| static inline int | static inline int | ||||||
| split_huge_page_to_list(struct page *page, struct list_head *list) | split_huge_page_to_list(struct page *page, struct list_head *list) | ||||||
|  |  | ||||||
|  | @ -71,6 +71,9 @@ static inline struct page *shmem_read_mapping_page( | ||||||
| 					mapping_gfp_mask(mapping)); | 					mapping_gfp_mask(mapping)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | extern bool shmem_charge(struct inode *inode, long pages); | ||||||
|  | extern void shmem_uncharge(struct inode *inode, long pages); | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_TMPFS | #ifdef CONFIG_TMPFS | ||||||
| 
 | 
 | ||||||
| extern int shmem_add_seals(struct file *file, unsigned int seals); | extern int shmem_add_seals(struct file *file, unsigned int seals); | ||||||
|  |  | ||||||
|  | @ -219,8 +219,13 @@ void __delete_from_page_cache(struct page *page, void *shadow) | ||||||
| 	/* hugetlb pages do not participate in page cache accounting. */ | 	/* hugetlb pages do not participate in page cache accounting. */ | ||||||
| 	if (!PageHuge(page)) | 	if (!PageHuge(page)) | ||||||
| 		__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr); | 		__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr); | ||||||
| 	if (PageSwapBacked(page)) | 	if (PageSwapBacked(page)) { | ||||||
| 		__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr); | 		__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr); | ||||||
|  | 		if (PageTransHuge(page)) | ||||||
|  | 			__dec_zone_page_state(page, NR_SHMEM_THPS); | ||||||
|  | 	} else { | ||||||
|  | 		VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * At this point page must be either written or cleaned by truncate. | 	 * At this point page must be either written or cleaned by truncate. | ||||||
|  |  | ||||||
|  | @ -3316,6 +3316,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, | ||||||
| 		if (head[i].index >= end) { | 		if (head[i].index >= end) { | ||||||
| 			__ClearPageDirty(head + i); | 			__ClearPageDirty(head + i); | ||||||
| 			__delete_from_page_cache(head + i, NULL); | 			__delete_from_page_cache(head + i, NULL); | ||||||
|  | 			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) | ||||||
|  | 				shmem_uncharge(head->mapping->host, 1); | ||||||
| 			put_page(head + i); | 			put_page(head + i); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -1142,7 +1142,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | ||||||
| 				 * unmap shared but keep private pages. | 				 * unmap shared but keep private pages. | ||||||
| 				 */ | 				 */ | ||||||
| 				if (details->check_mapping && | 				if (details->check_mapping && | ||||||
| 				    details->check_mapping != page->mapping) | 				    details->check_mapping != page_rmapping(page)) | ||||||
| 					continue; | 					continue; | ||||||
| 			} | 			} | ||||||
| 			ptent = ptep_get_and_clear_full(mm, addr, pte, | 			ptent = ptep_get_and_clear_full(mm, addr, pte, | ||||||
|  |  | ||||||
|  | @ -531,7 +531,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, | ||||||
| 		nid = page_to_nid(page); | 		nid = page_to_nid(page); | ||||||
| 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) | 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) | ||||||
| 			continue; | 			continue; | ||||||
| 		if (PageTransCompound(page) && PageAnon(page)) { | 		if (PageTransCompound(page)) { | ||||||
| 			get_page(page); | 			get_page(page); | ||||||
| 			pte_unmap_unlock(pte, ptl); | 			pte_unmap_unlock(pte, ptl); | ||||||
| 			lock_page(page); | 			lock_page(page); | ||||||
|  |  | ||||||
|  | @ -2563,6 +2563,7 @@ int set_page_dirty(struct page *page) | ||||||
| { | { | ||||||
| 	struct address_space *mapping = page_mapping(page); | 	struct address_space *mapping = page_mapping(page); | ||||||
| 
 | 
 | ||||||
|  | 	page = compound_head(page); | ||||||
| 	if (likely(mapping)) { | 	if (likely(mapping)) { | ||||||
| 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; | 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; | ||||||
| 		/*
 | 		/*
 | ||||||
|  |  | ||||||
							
								
								
									
										380
									
								
								mm/shmem.c
									
									
									
									
									
								
							
							
						
						
									
										380
									
								
								mm/shmem.c
									
									
									
									
									
								
							|  | @ -173,10 +173,13 @@ static inline int shmem_reacct_size(unsigned long flags, | ||||||
|  * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, |  * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, | ||||||
|  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. |  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. | ||||||
|  */ |  */ | ||||||
| static inline int shmem_acct_block(unsigned long flags) | static inline int shmem_acct_block(unsigned long flags, long pages) | ||||||
| { | { | ||||||
| 	return (flags & VM_NORESERVE) ? | 	if (!(flags & VM_NORESERVE)) | ||||||
| 		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0; | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	return security_vm_enough_memory_mm(current->mm, | ||||||
|  | 			pages * VM_ACCT(PAGE_SIZE)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void shmem_unacct_blocks(unsigned long flags, long pages) | static inline void shmem_unacct_blocks(unsigned long flags, long pages) | ||||||
|  | @ -249,6 +252,51 @@ static void shmem_recalc_inode(struct inode *inode) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool shmem_charge(struct inode *inode, long pages) | ||||||
|  | { | ||||||
|  | 	struct shmem_inode_info *info = SHMEM_I(inode); | ||||||
|  | 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||||||
|  | 
 | ||||||
|  | 	if (shmem_acct_block(info->flags, pages)) | ||||||
|  | 		return false; | ||||||
|  | 	spin_lock(&info->lock); | ||||||
|  | 	info->alloced += pages; | ||||||
|  | 	inode->i_blocks += pages * BLOCKS_PER_PAGE; | ||||||
|  | 	shmem_recalc_inode(inode); | ||||||
|  | 	spin_unlock(&info->lock); | ||||||
|  | 	inode->i_mapping->nrpages += pages; | ||||||
|  | 
 | ||||||
|  | 	if (!sbinfo->max_blocks) | ||||||
|  | 		return true; | ||||||
|  | 	if (percpu_counter_compare(&sbinfo->used_blocks, | ||||||
|  | 				sbinfo->max_blocks - pages) > 0) { | ||||||
|  | 		inode->i_mapping->nrpages -= pages; | ||||||
|  | 		spin_lock(&info->lock); | ||||||
|  | 		info->alloced -= pages; | ||||||
|  | 		shmem_recalc_inode(inode); | ||||||
|  | 		spin_unlock(&info->lock); | ||||||
|  | 
 | ||||||
|  | 		return false; | ||||||
|  | 	} | ||||||
|  | 	percpu_counter_add(&sbinfo->used_blocks, pages); | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void shmem_uncharge(struct inode *inode, long pages) | ||||||
|  | { | ||||||
|  | 	struct shmem_inode_info *info = SHMEM_I(inode); | ||||||
|  | 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&info->lock); | ||||||
|  | 	info->alloced -= pages; | ||||||
|  | 	inode->i_blocks -= pages * BLOCKS_PER_PAGE; | ||||||
|  | 	shmem_recalc_inode(inode); | ||||||
|  | 	spin_unlock(&info->lock); | ||||||
|  | 
 | ||||||
|  | 	if (sbinfo->max_blocks) | ||||||
|  | 		percpu_counter_sub(&sbinfo->used_blocks, pages); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Replace item expected in radix tree by a new item, while holding tree lock. |  * Replace item expected in radix tree by a new item, while holding tree lock. | ||||||
|  */ |  */ | ||||||
|  | @ -376,30 +424,57 @@ static int shmem_add_to_page_cache(struct page *page, | ||||||
| 				   struct address_space *mapping, | 				   struct address_space *mapping, | ||||||
| 				   pgoff_t index, void *expected) | 				   pgoff_t index, void *expected) | ||||||
| { | { | ||||||
| 	int error; | 	int error, nr = hpage_nr_pages(page); | ||||||
| 
 | 
 | ||||||
|  | 	VM_BUG_ON_PAGE(PageTail(page), page); | ||||||
|  | 	VM_BUG_ON_PAGE(index != round_down(index, nr), page); | ||||||
| 	VM_BUG_ON_PAGE(!PageLocked(page), page); | 	VM_BUG_ON_PAGE(!PageLocked(page), page); | ||||||
| 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page); | 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page); | ||||||
|  | 	VM_BUG_ON(expected && PageTransHuge(page)); | ||||||
| 
 | 
 | ||||||
| 	get_page(page); | 	page_ref_add(page, nr); | ||||||
| 	page->mapping = mapping; | 	page->mapping = mapping; | ||||||
| 	page->index = index; | 	page->index = index; | ||||||
| 
 | 
 | ||||||
| 	spin_lock_irq(&mapping->tree_lock); | 	spin_lock_irq(&mapping->tree_lock); | ||||||
| 	if (!expected) | 	if (PageTransHuge(page)) { | ||||||
|  | 		void __rcu **results; | ||||||
|  | 		pgoff_t idx; | ||||||
|  | 		int i; | ||||||
|  | 
 | ||||||
|  | 		error = 0; | ||||||
|  | 		if (radix_tree_gang_lookup_slot(&mapping->page_tree, | ||||||
|  | 					&results, &idx, index, 1) && | ||||||
|  | 				idx < index + HPAGE_PMD_NR) { | ||||||
|  | 			error = -EEXIST; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (!error) { | ||||||
|  | 			for (i = 0; i < HPAGE_PMD_NR; i++) { | ||||||
|  | 				error = radix_tree_insert(&mapping->page_tree, | ||||||
|  | 						index + i, page + i); | ||||||
|  | 				VM_BUG_ON(error); | ||||||
|  | 			} | ||||||
|  | 			count_vm_event(THP_FILE_ALLOC); | ||||||
|  | 		} | ||||||
|  | 	} else if (!expected) { | ||||||
| 		error = radix_tree_insert(&mapping->page_tree, index, page); | 		error = radix_tree_insert(&mapping->page_tree, index, page); | ||||||
| 	else | 	} else { | ||||||
| 		error = shmem_radix_tree_replace(mapping, index, expected, | 		error = shmem_radix_tree_replace(mapping, index, expected, | ||||||
| 								 page); | 								 page); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	if (!error) { | 	if (!error) { | ||||||
| 		mapping->nrpages++; | 		mapping->nrpages += nr; | ||||||
| 		__inc_zone_page_state(page, NR_FILE_PAGES); | 		if (PageTransHuge(page)) | ||||||
| 		__inc_zone_page_state(page, NR_SHMEM); | 			__inc_zone_page_state(page, NR_SHMEM_THPS); | ||||||
|  | 		__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr); | ||||||
|  | 		__mod_zone_page_state(page_zone(page), NR_SHMEM, nr); | ||||||
| 		spin_unlock_irq(&mapping->tree_lock); | 		spin_unlock_irq(&mapping->tree_lock); | ||||||
| 	} else { | 	} else { | ||||||
| 		page->mapping = NULL; | 		page->mapping = NULL; | ||||||
| 		spin_unlock_irq(&mapping->tree_lock); | 		spin_unlock_irq(&mapping->tree_lock); | ||||||
| 		put_page(page); | 		page_ref_sub(page, nr); | ||||||
| 	} | 	} | ||||||
| 	return error; | 	return error; | ||||||
| } | } | ||||||
|  | @ -412,6 +487,8 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap) | ||||||
| 	struct address_space *mapping = page->mapping; | 	struct address_space *mapping = page->mapping; | ||||||
| 	int error; | 	int error; | ||||||
| 
 | 
 | ||||||
|  | 	VM_BUG_ON_PAGE(PageCompound(page), page); | ||||||
|  | 
 | ||||||
| 	spin_lock_irq(&mapping->tree_lock); | 	spin_lock_irq(&mapping->tree_lock); | ||||||
| 	error = shmem_radix_tree_replace(mapping, page->index, page, radswap); | 	error = shmem_radix_tree_replace(mapping, page->index, page, radswap); | ||||||
| 	page->mapping = NULL; | 	page->mapping = NULL; | ||||||
|  | @ -591,10 +668,33 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | ||||||
| 				continue; | 				continue; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
|  | 			VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page); | ||||||
|  | 
 | ||||||
| 			if (!trylock_page(page)) | 			if (!trylock_page(page)) | ||||||
| 				continue; | 				continue; | ||||||
|  | 
 | ||||||
|  | 			if (PageTransTail(page)) { | ||||||
|  | 				/* Middle of THP: zero out the page */ | ||||||
|  | 				clear_highpage(page); | ||||||
|  | 				unlock_page(page); | ||||||
|  | 				continue; | ||||||
|  | 			} else if (PageTransHuge(page)) { | ||||||
|  | 				if (index == round_down(end, HPAGE_PMD_NR)) { | ||||||
|  | 					/*
 | ||||||
|  | 					 * Range ends in the middle of THP: | ||||||
|  | 					 * zero out the page | ||||||
|  | 					 */ | ||||||
|  | 					clear_highpage(page); | ||||||
|  | 					unlock_page(page); | ||||||
|  | 					continue; | ||||||
|  | 				} | ||||||
|  | 				index += HPAGE_PMD_NR - 1; | ||||||
|  | 				i += HPAGE_PMD_NR - 1; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
| 			if (!unfalloc || !PageUptodate(page)) { | 			if (!unfalloc || !PageUptodate(page)) { | ||||||
| 				if (page->mapping == mapping) { | 				VM_BUG_ON_PAGE(PageTail(page), page); | ||||||
|  | 				if (page_mapping(page) == mapping) { | ||||||
| 					VM_BUG_ON_PAGE(PageWriteback(page), page); | 					VM_BUG_ON_PAGE(PageWriteback(page), page); | ||||||
| 					truncate_inode_page(mapping, page); | 					truncate_inode_page(mapping, page); | ||||||
| 				} | 				} | ||||||
|  | @ -670,8 +770,36 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			lock_page(page); | 			lock_page(page); | ||||||
|  | 
 | ||||||
|  | 			if (PageTransTail(page)) { | ||||||
|  | 				/* Middle of THP: zero out the page */ | ||||||
|  | 				clear_highpage(page); | ||||||
|  | 				unlock_page(page); | ||||||
|  | 				/*
 | ||||||
|  | 				 * Partial thp truncate due 'start' in middle | ||||||
|  | 				 * of THP: don't need to look on these pages | ||||||
|  | 				 * again on !pvec.nr restart. | ||||||
|  | 				 */ | ||||||
|  | 				if (index != round_down(end, HPAGE_PMD_NR)) | ||||||
|  | 					start++; | ||||||
|  | 				continue; | ||||||
|  | 			} else if (PageTransHuge(page)) { | ||||||
|  | 				if (index == round_down(end, HPAGE_PMD_NR)) { | ||||||
|  | 					/*
 | ||||||
|  | 					 * Range ends in the middle of THP: | ||||||
|  | 					 * zero out the page | ||||||
|  | 					 */ | ||||||
|  | 					clear_highpage(page); | ||||||
|  | 					unlock_page(page); | ||||||
|  | 					continue; | ||||||
|  | 				} | ||||||
|  | 				index += HPAGE_PMD_NR - 1; | ||||||
|  | 				i += HPAGE_PMD_NR - 1; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
| 			if (!unfalloc || !PageUptodate(page)) { | 			if (!unfalloc || !PageUptodate(page)) { | ||||||
| 				if (page->mapping == mapping) { | 				VM_BUG_ON_PAGE(PageTail(page), page); | ||||||
|  | 				if (page_mapping(page) == mapping) { | ||||||
| 					VM_BUG_ON_PAGE(PageWriteback(page), page); | 					VM_BUG_ON_PAGE(PageWriteback(page), page); | ||||||
| 					truncate_inode_page(mapping, page); | 					truncate_inode_page(mapping, page); | ||||||
| 				} else { | 				} else { | ||||||
|  | @ -929,6 +1057,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | ||||||
| 	swp_entry_t swap; | 	swp_entry_t swap; | ||||||
| 	pgoff_t index; | 	pgoff_t index; | ||||||
| 
 | 
 | ||||||
|  | 	VM_BUG_ON_PAGE(PageCompound(page), page); | ||||||
| 	BUG_ON(!PageLocked(page)); | 	BUG_ON(!PageLocked(page)); | ||||||
| 	mapping = page->mapping; | 	mapping = page->mapping; | ||||||
| 	index = page->index; | 	index = page->index; | ||||||
|  | @ -1065,24 +1194,63 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) | ||||||
| #define vm_policy vm_private_data | #define vm_policy vm_private_data | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | static void shmem_pseudo_vma_init(struct vm_area_struct *vma, | ||||||
|  | 		struct shmem_inode_info *info, pgoff_t index) | ||||||
|  | { | ||||||
|  | 	/* Create a pseudo vma that just contains the policy */ | ||||||
|  | 	vma->vm_start = 0; | ||||||
|  | 	/* Bias interleave by inode number to distribute better across nodes */ | ||||||
|  | 	vma->vm_pgoff = index + info->vfs_inode.i_ino; | ||||||
|  | 	vma->vm_ops = NULL; | ||||||
|  | 	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma) | ||||||
|  | { | ||||||
|  | 	/* Drop reference taken by mpol_shared_policy_lookup() */ | ||||||
|  | 	mpol_cond_put(vma->vm_policy); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, | static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, | ||||||
| 			struct shmem_inode_info *info, pgoff_t index) | 			struct shmem_inode_info *info, pgoff_t index) | ||||||
| { | { | ||||||
| 	struct vm_area_struct pvma; | 	struct vm_area_struct pvma; | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 
 | 
 | ||||||
| 	/* Create a pseudo vma that just contains the policy */ | 	shmem_pseudo_vma_init(&pvma, info, index); | ||||||
| 	pvma.vm_start = 0; |  | ||||||
| 	/* Bias interleave by inode number to distribute better across nodes */ |  | ||||||
| 	pvma.vm_pgoff = index + info->vfs_inode.i_ino; |  | ||||||
| 	pvma.vm_ops = NULL; |  | ||||||
| 	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |  | ||||||
| 
 |  | ||||||
| 	page = swapin_readahead(swap, gfp, &pvma, 0); | 	page = swapin_readahead(swap, gfp, &pvma, 0); | ||||||
|  | 	shmem_pseudo_vma_destroy(&pvma); | ||||||
| 
 | 
 | ||||||
| 	/* Drop reference taken by mpol_shared_policy_lookup() */ | 	return page; | ||||||
| 	mpol_cond_put(pvma.vm_policy); | } | ||||||
| 
 | 
 | ||||||
|  | static struct page *shmem_alloc_hugepage(gfp_t gfp, | ||||||
|  | 		struct shmem_inode_info *info, pgoff_t index) | ||||||
|  | { | ||||||
|  | 	struct vm_area_struct pvma; | ||||||
|  | 	struct inode *inode = &info->vfs_inode; | ||||||
|  | 	struct address_space *mapping = inode->i_mapping; | ||||||
|  | 	pgoff_t idx, hindex = round_down(index, HPAGE_PMD_NR); | ||||||
|  | 	void __rcu **results; | ||||||
|  | 	struct page *page; | ||||||
|  | 
 | ||||||
|  | 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx, | ||||||
|  | 				hindex, 1) && idx < hindex + HPAGE_PMD_NR) { | ||||||
|  | 		rcu_read_unlock(); | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | 
 | ||||||
|  | 	shmem_pseudo_vma_init(&pvma, info, hindex); | ||||||
|  | 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, | ||||||
|  | 			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); | ||||||
|  | 	shmem_pseudo_vma_destroy(&pvma); | ||||||
|  | 	if (page) | ||||||
|  | 		prep_transhuge_page(page); | ||||||
| 	return page; | 	return page; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1092,23 +1260,51 @@ static struct page *shmem_alloc_page(gfp_t gfp, | ||||||
| 	struct vm_area_struct pvma; | 	struct vm_area_struct pvma; | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 
 | 
 | ||||||
| 	/* Create a pseudo vma that just contains the policy */ | 	shmem_pseudo_vma_init(&pvma, info, index); | ||||||
| 	pvma.vm_start = 0; | 	page = alloc_page_vma(gfp, &pvma, 0); | ||||||
| 	/* Bias interleave by inode number to distribute better across nodes */ | 	shmem_pseudo_vma_destroy(&pvma); | ||||||
| 	pvma.vm_pgoff = index + info->vfs_inode.i_ino; |  | ||||||
| 	pvma.vm_ops = NULL; |  | ||||||
| 	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); |  | ||||||
| 
 | 
 | ||||||
| 	page = alloc_pages_vma(gfp, 0, &pvma, 0, numa_node_id(), false); | 	return page; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct page *shmem_alloc_and_acct_page(gfp_t gfp, | ||||||
|  | 		struct shmem_inode_info *info, struct shmem_sb_info *sbinfo, | ||||||
|  | 		pgoff_t index, bool huge) | ||||||
|  | { | ||||||
|  | 	struct page *page; | ||||||
|  | 	int nr; | ||||||
|  | 	int err = -ENOSPC; | ||||||
|  | 
 | ||||||
|  | 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) | ||||||
|  | 		huge = false; | ||||||
|  | 	nr = huge ? HPAGE_PMD_NR : 1; | ||||||
|  | 
 | ||||||
|  | 	if (shmem_acct_block(info->flags, nr)) | ||||||
|  | 		goto failed; | ||||||
|  | 	if (sbinfo->max_blocks) { | ||||||
|  | 		if (percpu_counter_compare(&sbinfo->used_blocks, | ||||||
|  | 					sbinfo->max_blocks - nr) > 0) | ||||||
|  | 			goto unacct; | ||||||
|  | 		percpu_counter_add(&sbinfo->used_blocks, nr); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (huge) | ||||||
|  | 		page = shmem_alloc_hugepage(gfp, info, index); | ||||||
|  | 	else | ||||||
|  | 		page = shmem_alloc_page(gfp, info, index); | ||||||
| 	if (page) { | 	if (page) { | ||||||
| 		__SetPageLocked(page); | 		__SetPageLocked(page); | ||||||
| 		__SetPageSwapBacked(page); | 		__SetPageSwapBacked(page); | ||||||
|  | 		return page; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Drop reference taken by mpol_shared_policy_lookup() */ | 	err = -ENOMEM; | ||||||
| 	mpol_cond_put(pvma.vm_policy); | 	if (sbinfo->max_blocks) | ||||||
| 
 | 		percpu_counter_add(&sbinfo->used_blocks, -nr); | ||||||
| 	return page; | unacct: | ||||||
|  | 	shmem_unacct_blocks(info->flags, nr); | ||||||
|  | failed: | ||||||
|  | 	return ERR_PTR(err); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -1213,6 +1409,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | ||||||
| 	struct mem_cgroup *memcg; | 	struct mem_cgroup *memcg; | ||||||
| 	struct page *page; | 	struct page *page; | ||||||
| 	swp_entry_t swap; | 	swp_entry_t swap; | ||||||
|  | 	pgoff_t hindex = index; | ||||||
| 	int error; | 	int error; | ||||||
| 	int once = 0; | 	int once = 0; | ||||||
| 	int alloced = 0; | 	int alloced = 0; | ||||||
|  | @ -1334,47 +1531,74 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | ||||||
| 		swap_free(swap); | 		swap_free(swap); | ||||||
| 
 | 
 | ||||||
| 	} else { | 	} else { | ||||||
| 		if (shmem_acct_block(info->flags)) { | 		/* shmem_symlink() */ | ||||||
| 			error = -ENOSPC; | 		if (mapping->a_ops != &shmem_aops) | ||||||
| 			goto failed; | 			goto alloc_nohuge; | ||||||
| 		} | 		if (shmem_huge == SHMEM_HUGE_DENY) | ||||||
| 		if (sbinfo->max_blocks) { | 			goto alloc_nohuge; | ||||||
| 			if (percpu_counter_compare(&sbinfo->used_blocks, | 		if (shmem_huge == SHMEM_HUGE_FORCE) | ||||||
| 						sbinfo->max_blocks) >= 0) { | 			goto alloc_huge; | ||||||
| 				error = -ENOSPC; | 		switch (sbinfo->huge) { | ||||||
| 				goto unacct; | 			loff_t i_size; | ||||||
| 			} | 			pgoff_t off; | ||||||
| 			percpu_counter_inc(&sbinfo->used_blocks); | 		case SHMEM_HUGE_NEVER: | ||||||
|  | 			goto alloc_nohuge; | ||||||
|  | 		case SHMEM_HUGE_WITHIN_SIZE: | ||||||
|  | 			off = round_up(index, HPAGE_PMD_NR); | ||||||
|  | 			i_size = round_up(i_size_read(inode), PAGE_SIZE); | ||||||
|  | 			if (i_size >= HPAGE_PMD_SIZE && | ||||||
|  | 					i_size >> PAGE_SHIFT >= off) | ||||||
|  | 				goto alloc_huge; | ||||||
|  | 			/* fallthrough */ | ||||||
|  | 		case SHMEM_HUGE_ADVISE: | ||||||
|  | 			/* TODO: wire up fadvise()/madvise() */ | ||||||
|  | 			goto alloc_nohuge; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		page = shmem_alloc_page(gfp, info, index); | alloc_huge: | ||||||
| 		if (!page) { | 		page = shmem_alloc_and_acct_page(gfp, info, sbinfo, | ||||||
| 			error = -ENOMEM; | 				index, true); | ||||||
| 			goto decused; | 		if (IS_ERR(page)) { | ||||||
|  | alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo, | ||||||
|  | 					index, false); | ||||||
| 		} | 		} | ||||||
|  | 		if (IS_ERR(page)) { | ||||||
|  | 			error = PTR_ERR(page); | ||||||
|  | 			page = NULL; | ||||||
|  | 			goto failed; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (PageTransHuge(page)) | ||||||
|  | 			hindex = round_down(index, HPAGE_PMD_NR); | ||||||
|  | 		else | ||||||
|  | 			hindex = index; | ||||||
|  | 
 | ||||||
| 		if (sgp == SGP_WRITE) | 		if (sgp == SGP_WRITE) | ||||||
| 			__SetPageReferenced(page); | 			__SetPageReferenced(page); | ||||||
| 
 | 
 | ||||||
| 		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, | 		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, | ||||||
| 				false); | 				PageTransHuge(page)); | ||||||
| 		if (error) | 		if (error) | ||||||
| 			goto decused; | 			goto unacct; | ||||||
| 		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); | 		error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK, | ||||||
|  | 				compound_order(page)); | ||||||
| 		if (!error) { | 		if (!error) { | ||||||
| 			error = shmem_add_to_page_cache(page, mapping, index, | 			error = shmem_add_to_page_cache(page, mapping, hindex, | ||||||
| 							NULL); | 							NULL); | ||||||
| 			radix_tree_preload_end(); | 			radix_tree_preload_end(); | ||||||
| 		} | 		} | ||||||
| 		if (error) { | 		if (error) { | ||||||
| 			mem_cgroup_cancel_charge(page, memcg, false); | 			mem_cgroup_cancel_charge(page, memcg, | ||||||
| 			goto decused; | 					PageTransHuge(page)); | ||||||
|  | 			goto unacct; | ||||||
| 		} | 		} | ||||||
| 		mem_cgroup_commit_charge(page, memcg, false, false); | 		mem_cgroup_commit_charge(page, memcg, false, | ||||||
|  | 				PageTransHuge(page)); | ||||||
| 		lru_cache_add_anon(page); | 		lru_cache_add_anon(page); | ||||||
| 
 | 
 | ||||||
| 		spin_lock(&info->lock); | 		spin_lock(&info->lock); | ||||||
| 		info->alloced++; | 		info->alloced += 1 << compound_order(page); | ||||||
| 		inode->i_blocks += BLOCKS_PER_PAGE; | 		inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); | ||||||
| 		shmem_recalc_inode(inode); | 		shmem_recalc_inode(inode); | ||||||
| 		spin_unlock(&info->lock); | 		spin_unlock(&info->lock); | ||||||
| 		alloced = true; | 		alloced = true; | ||||||
|  | @ -1390,10 +1614,15 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | ||||||
| 		 * but SGP_FALLOC on a page fallocated earlier must initialize | 		 * but SGP_FALLOC on a page fallocated earlier must initialize | ||||||
| 		 * it now, lest undo on failure cancel our earlier guarantee. | 		 * it now, lest undo on failure cancel our earlier guarantee. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (sgp != SGP_WRITE) { | 		if (sgp != SGP_WRITE && !PageUptodate(page)) { | ||||||
| 			clear_highpage(page); | 			struct page *head = compound_head(page); | ||||||
| 			flush_dcache_page(page); | 			int i; | ||||||
| 			SetPageUptodate(page); | 
 | ||||||
|  | 			for (i = 0; i < (1 << compound_order(head)); i++) { | ||||||
|  | 				clear_highpage(head + i); | ||||||
|  | 				flush_dcache_page(head + i); | ||||||
|  | 			} | ||||||
|  | 			SetPageUptodate(head); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -1410,17 +1639,23 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | ||||||
| 		error = -EINVAL; | 		error = -EINVAL; | ||||||
| 		goto unlock; | 		goto unlock; | ||||||
| 	} | 	} | ||||||
| 	*pagep = page; | 	*pagep = page + index - hindex; | ||||||
| 	return 0; | 	return 0; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Error recovery. | 	 * Error recovery. | ||||||
| 	 */ | 	 */ | ||||||
| decused: |  | ||||||
| 	if (sbinfo->max_blocks) |  | ||||||
| 		percpu_counter_add(&sbinfo->used_blocks, -1); |  | ||||||
| unacct: | unacct: | ||||||
| 	shmem_unacct_blocks(info->flags, 1); | 	if (sbinfo->max_blocks) | ||||||
|  | 		percpu_counter_sub(&sbinfo->used_blocks, | ||||||
|  | 				1 << compound_order(page)); | ||||||
|  | 	shmem_unacct_blocks(info->flags, 1 << compound_order(page)); | ||||||
|  | 
 | ||||||
|  | 	if (PageTransHuge(page)) { | ||||||
|  | 		unlock_page(page); | ||||||
|  | 		put_page(page); | ||||||
|  | 		goto alloc_nohuge; | ||||||
|  | 	} | ||||||
| failed: | failed: | ||||||
| 	if (swap.val && !shmem_confirm_swap(mapping, index, swap)) | 	if (swap.val && !shmem_confirm_swap(mapping, index, swap)) | ||||||
| 		error = -EEXIST; | 		error = -EEXIST; | ||||||
|  | @ -1758,12 +1993,23 @@ shmem_write_end(struct file *file, struct address_space *mapping, | ||||||
| 		i_size_write(inode, pos + copied); | 		i_size_write(inode, pos + copied); | ||||||
| 
 | 
 | ||||||
| 	if (!PageUptodate(page)) { | 	if (!PageUptodate(page)) { | ||||||
|  | 		struct page *head = compound_head(page); | ||||||
|  | 		if (PageTransCompound(page)) { | ||||||
|  | 			int i; | ||||||
|  | 
 | ||||||
|  | 			for (i = 0; i < HPAGE_PMD_NR; i++) { | ||||||
|  | 				if (head + i == page) | ||||||
|  | 					continue; | ||||||
|  | 				clear_highpage(head + i); | ||||||
|  | 				flush_dcache_page(head + i); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
| 		if (copied < PAGE_SIZE) { | 		if (copied < PAGE_SIZE) { | ||||||
| 			unsigned from = pos & (PAGE_SIZE - 1); | 			unsigned from = pos & (PAGE_SIZE - 1); | ||||||
| 			zero_user_segments(page, 0, from, | 			zero_user_segments(page, 0, from, | ||||||
| 					from + copied, PAGE_SIZE); | 					from + copied, PAGE_SIZE); | ||||||
| 		} | 		} | ||||||
| 		SetPageUptodate(page); | 		SetPageUptodate(head); | ||||||
| 	} | 	} | ||||||
| 	set_page_dirty(page); | 	set_page_dirty(page); | ||||||
| 	unlock_page(page); | 	unlock_page(page); | ||||||
|  |  | ||||||
|  | @ -292,6 +292,7 @@ static bool need_activate_page_drain(int cpu) | ||||||
| 
 | 
 | ||||||
| void activate_page(struct page *page) | void activate_page(struct page *page) | ||||||
| { | { | ||||||
|  | 	page = compound_head(page); | ||||||
| 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | ||||||
| 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | ||||||
| 
 | 
 | ||||||
|  | @ -316,6 +317,7 @@ void activate_page(struct page *page) | ||||||
| { | { | ||||||
| 	struct zone *zone = page_zone(page); | 	struct zone *zone = page_zone(page); | ||||||
| 
 | 
 | ||||||
|  | 	page = compound_head(page); | ||||||
| 	spin_lock_irq(&zone->lru_lock); | 	spin_lock_irq(&zone->lru_lock); | ||||||
| 	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); | 	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); | ||||||
| 	spin_unlock_irq(&zone->lru_lock); | 	spin_unlock_irq(&zone->lru_lock); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Kirill A. Shutemov
						Kirill A. Shutemov