mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	mm: Convert collapse_shmem to XArray
I found another victim of the radix tree being hard to use. Because there was no call to radix_tree_preload(), khugepaged was allocating radix_tree_nodes using GFP_ATOMIC. I also converted a local_irq_save()/restore() pair to disable()/enable(). Signed-off-by: Matthew Wilcox <willy@infradead.org>
This commit is contained in:
		
							parent
							
								
									aa5dc07f70
								
							
						
					
					
						commit
						77da9389b9
					
				
					 1 changed files with 66 additions and 93 deletions
				
			
		
							
								
								
									
										159
									
								
								mm/khugepaged.c
									
									
									
									
									
								
							
							
						
						
									
										159
									
								
								mm/khugepaged.c
									
									
									
									
									
								
							| 
						 | 
					@ -1288,17 +1288,17 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Basic scheme is simple, details are more complex:
 | 
					 * Basic scheme is simple, details are more complex:
 | 
				
			||||||
 *  - allocate and freeze a new huge page;
 | 
					 *  - allocate and freeze a new huge page;
 | 
				
			||||||
 *  - scan over radix tree replacing old pages the new one
 | 
					 *  - scan page cache replacing old pages with the new one
 | 
				
			||||||
 *    + swap in pages if necessary;
 | 
					 *    + swap in pages if necessary;
 | 
				
			||||||
 *    + fill in gaps;
 | 
					 *    + fill in gaps;
 | 
				
			||||||
 *    + keep old pages around in case if rollback is required;
 | 
					 *    + keep old pages around in case rollback is required;
 | 
				
			||||||
 *  - if replacing succeed:
 | 
					 *  - if replacing succeeds:
 | 
				
			||||||
 *    + copy data over;
 | 
					 *    + copy data over;
 | 
				
			||||||
 *    + free old pages;
 | 
					 *    + free old pages;
 | 
				
			||||||
 *    + unfreeze huge page;
 | 
					 *    + unfreeze huge page;
 | 
				
			||||||
 *  - if replacing failed;
 | 
					 *  - if replacing failed;
 | 
				
			||||||
 *    + put all pages back and unfreeze them;
 | 
					 *    + put all pages back and unfreeze them;
 | 
				
			||||||
 *    + restore gaps in the radix-tree;
 | 
					 *    + restore gaps in the page cache;
 | 
				
			||||||
 *    + free huge page;
 | 
					 *    + free huge page;
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void collapse_shmem(struct mm_struct *mm,
 | 
					static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
| 
						 | 
					@ -1306,12 +1306,11 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
		struct page **hpage, int node)
 | 
							struct page **hpage, int node)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gfp_t gfp;
 | 
						gfp_t gfp;
 | 
				
			||||||
	struct page *page, *new_page, *tmp;
 | 
						struct page *new_page;
 | 
				
			||||||
	struct mem_cgroup *memcg;
 | 
						struct mem_cgroup *memcg;
 | 
				
			||||||
	pgoff_t index, end = start + HPAGE_PMD_NR;
 | 
						pgoff_t index, end = start + HPAGE_PMD_NR;
 | 
				
			||||||
	LIST_HEAD(pagelist);
 | 
						LIST_HEAD(pagelist);
 | 
				
			||||||
	struct radix_tree_iter iter;
 | 
						XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
 | 
				
			||||||
	void **slot;
 | 
					 | 
				
			||||||
	int nr_none = 0, result = SCAN_SUCCEED;
 | 
						int nr_none = 0, result = SCAN_SUCCEED;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 | 
						VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 | 
				
			||||||
| 
						 | 
					@ -1336,48 +1335,49 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
	__SetPageLocked(new_page);
 | 
						__SetPageLocked(new_page);
 | 
				
			||||||
	BUG_ON(!page_ref_freeze(new_page, 1));
 | 
						BUG_ON(!page_ref_freeze(new_page, 1));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * At this point the new_page is 'frozen' (page_count() is zero), locked
 | 
						 * At this point the new_page is 'frozen' (page_count() is zero),
 | 
				
			||||||
	 * and not up-to-date. It's safe to insert it into radix tree, because
 | 
						 * locked and not up-to-date. It's safe to insert it into the page
 | 
				
			||||||
	 * nobody would be able to map it or use it in other way until we
 | 
						 * cache, because nobody would be able to map it or use it in other
 | 
				
			||||||
	 * unfreeze it.
 | 
						 * way until we unfreeze it.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	index = start;
 | 
						/* This will be less messy when we use multi-index entries */
 | 
				
			||||||
	xa_lock_irq(&mapping->i_pages);
 | 
						do {
 | 
				
			||||||
	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 | 
							xas_lock_irq(&xas);
 | 
				
			||||||
		int n = min(iter.index, end) - index;
 | 
							xas_create_range(&xas);
 | 
				
			||||||
 | 
							if (!xas_error(&xas))
 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Handle holes in the radix tree: charge it from shmem and
 | 
					 | 
				
			||||||
		 * insert relevant subpage of new_page into the radix-tree.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (n && !shmem_charge(mapping->host, n)) {
 | 
					 | 
				
			||||||
			result = SCAN_FAIL;
 | 
					 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							xas_unlock_irq(&xas);
 | 
				
			||||||
		nr_none += n;
 | 
							if (!xas_nomem(&xas, GFP_KERNEL))
 | 
				
			||||||
		for (; index < min(iter.index, end); index++) {
 | 
								goto out;
 | 
				
			||||||
			radix_tree_insert(&mapping->i_pages, index,
 | 
						} while (1);
 | 
				
			||||||
					new_page + (index % HPAGE_PMD_NR));
 | 
					
 | 
				
			||||||
 | 
						xas_set(&xas, start);
 | 
				
			||||||
 | 
						for (index = start; index < end; index++) {
 | 
				
			||||||
 | 
							struct page *page = xas_next(&xas);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							VM_BUG_ON(index != xas.xa_index);
 | 
				
			||||||
 | 
							if (!page) {
 | 
				
			||||||
 | 
								if (!shmem_charge(mapping->host, 1)) {
 | 
				
			||||||
 | 
									result = SCAN_FAIL;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 | 
				
			||||||
 | 
								nr_none++;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* We are done. */
 | 
					 | 
				
			||||||
		if (index >= end)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		page = radix_tree_deref_slot_protected(slot,
 | 
					 | 
				
			||||||
				&mapping->i_pages.xa_lock);
 | 
					 | 
				
			||||||
		if (xa_is_value(page) || !PageUptodate(page)) {
 | 
							if (xa_is_value(page) || !PageUptodate(page)) {
 | 
				
			||||||
			xa_unlock_irq(&mapping->i_pages);
 | 
								xas_unlock_irq(&xas);
 | 
				
			||||||
			/* swap in or instantiate fallocated page */
 | 
								/* swap in or instantiate fallocated page */
 | 
				
			||||||
			if (shmem_getpage(mapping->host, index, &page,
 | 
								if (shmem_getpage(mapping->host, index, &page,
 | 
				
			||||||
						SGP_NOHUGE)) {
 | 
											SGP_NOHUGE)) {
 | 
				
			||||||
				result = SCAN_FAIL;
 | 
									result = SCAN_FAIL;
 | 
				
			||||||
				goto tree_unlocked;
 | 
									goto xa_unlocked;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			xa_lock_irq(&mapping->i_pages);
 | 
								xas_lock_irq(&xas);
 | 
				
			||||||
 | 
								xas_set(&xas, index);
 | 
				
			||||||
		} else if (trylock_page(page)) {
 | 
							} else if (trylock_page(page)) {
 | 
				
			||||||
			get_page(page);
 | 
								get_page(page);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
| 
						 | 
					@ -1397,7 +1397,7 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
			result = SCAN_TRUNCATED;
 | 
								result = SCAN_TRUNCATED;
 | 
				
			||||||
			goto out_unlock;
 | 
								goto out_unlock;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		xa_unlock_irq(&mapping->i_pages);
 | 
							xas_unlock_irq(&xas);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (isolate_lru_page(page)) {
 | 
							if (isolate_lru_page(page)) {
 | 
				
			||||||
			result = SCAN_DEL_PAGE_LRU;
 | 
								result = SCAN_DEL_PAGE_LRU;
 | 
				
			||||||
| 
						 | 
					@ -1407,17 +1407,16 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
		if (page_mapped(page))
 | 
							if (page_mapped(page))
 | 
				
			||||||
			unmap_mapping_pages(mapping, index, 1, false);
 | 
								unmap_mapping_pages(mapping, index, 1, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		xa_lock_irq(&mapping->i_pages);
 | 
							xas_lock_irq(&xas);
 | 
				
			||||||
 | 
							xas_set(&xas, index);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		slot = radix_tree_lookup_slot(&mapping->i_pages, index);
 | 
							VM_BUG_ON_PAGE(page != xas_load(&xas), page);
 | 
				
			||||||
		VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
 | 
					 | 
				
			||||||
					&mapping->i_pages.xa_lock), page);
 | 
					 | 
				
			||||||
		VM_BUG_ON_PAGE(page_mapped(page), page);
 | 
							VM_BUG_ON_PAGE(page_mapped(page), page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * The page is expected to have page_count() == 3:
 | 
							 * The page is expected to have page_count() == 3:
 | 
				
			||||||
		 *  - we hold a pin on it;
 | 
							 *  - we hold a pin on it;
 | 
				
			||||||
		 *  - one reference from radix tree;
 | 
							 *  - one reference from page cache;
 | 
				
			||||||
		 *  - one from isolate_lru_page;
 | 
							 *  - one from isolate_lru_page;
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (!page_ref_freeze(page, 3)) {
 | 
							if (!page_ref_freeze(page, 3)) {
 | 
				
			||||||
| 
						 | 
					@ -1432,56 +1431,30 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
		list_add_tail(&page->lru, &pagelist);
 | 
							list_add_tail(&page->lru, &pagelist);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Finally, replace with the new page. */
 | 
							/* Finally, replace with the new page. */
 | 
				
			||||||
		radix_tree_replace_slot(&mapping->i_pages, slot,
 | 
							xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 | 
				
			||||||
				new_page + (index % HPAGE_PMD_NR));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		slot = radix_tree_iter_resume(slot, &iter);
 | 
					 | 
				
			||||||
		index++;
 | 
					 | 
				
			||||||
		continue;
 | 
							continue;
 | 
				
			||||||
out_lru:
 | 
					out_lru:
 | 
				
			||||||
		xa_unlock_irq(&mapping->i_pages);
 | 
							xas_unlock_irq(&xas);
 | 
				
			||||||
		putback_lru_page(page);
 | 
							putback_lru_page(page);
 | 
				
			||||||
out_isolate_failed:
 | 
					out_isolate_failed:
 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
		goto tree_unlocked;
 | 
							goto xa_unlocked;
 | 
				
			||||||
out_unlock:
 | 
					out_unlock:
 | 
				
			||||||
		unlock_page(page);
 | 
							unlock_page(page);
 | 
				
			||||||
		put_page(page);
 | 
							put_page(page);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						xas_unlock_irq(&xas);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
					xa_unlocked:
 | 
				
			||||||
	 * Handle hole in radix tree at the end of the range.
 | 
					 | 
				
			||||||
	 * This code only triggers if there's nothing in radix tree
 | 
					 | 
				
			||||||
	 * beyond 'end'.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (result == SCAN_SUCCEED && index < end) {
 | 
					 | 
				
			||||||
		int n = end - index;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!shmem_charge(mapping->host, n)) {
 | 
					 | 
				
			||||||
			result = SCAN_FAIL;
 | 
					 | 
				
			||||||
			goto tree_locked;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		for (; index < end; index++) {
 | 
					 | 
				
			||||||
			radix_tree_insert(&mapping->i_pages, index,
 | 
					 | 
				
			||||||
					new_page + (index % HPAGE_PMD_NR));
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		nr_none += n;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tree_locked:
 | 
					 | 
				
			||||||
	xa_unlock_irq(&mapping->i_pages);
 | 
					 | 
				
			||||||
tree_unlocked:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (result == SCAN_SUCCEED) {
 | 
						if (result == SCAN_SUCCEED) {
 | 
				
			||||||
		unsigned long flags;
 | 
							struct page *page, *tmp;
 | 
				
			||||||
		struct zone *zone = page_zone(new_page);
 | 
							struct zone *zone = page_zone(new_page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Replacing old pages with new one has succeed, now we need to
 | 
							 * Replacing old pages with new one has succeeded, now we
 | 
				
			||||||
		 * copy the content and free old pages.
 | 
							 * need to copy the content and free the old pages.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
 | 
							list_for_each_entry_safe(page, tmp, &pagelist, lru) {
 | 
				
			||||||
			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
 | 
								copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
 | 
				
			||||||
| 
						 | 
					@ -1495,16 +1468,16 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
			put_page(page);
 | 
								put_page(page);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		local_irq_save(flags);
 | 
							local_irq_disable();
 | 
				
			||||||
		__inc_node_page_state(new_page, NR_SHMEM_THPS);
 | 
							__inc_node_page_state(new_page, NR_SHMEM_THPS);
 | 
				
			||||||
		if (nr_none) {
 | 
							if (nr_none) {
 | 
				
			||||||
			__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
 | 
								__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
 | 
				
			||||||
			__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
 | 
								__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		local_irq_restore(flags);
 | 
							local_irq_enable();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Remove pte page tables, so we can re-faulti
 | 
							 * Remove pte page tables, so we can re-fault
 | 
				
			||||||
		 * the page as huge.
 | 
							 * the page as huge.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		retract_page_tables(mapping, start);
 | 
							retract_page_tables(mapping, start);
 | 
				
			||||||
| 
						 | 
					@ -1521,37 +1494,37 @@ static void collapse_shmem(struct mm_struct *mm,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		khugepaged_pages_collapsed++;
 | 
							khugepaged_pages_collapsed++;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		/* Something went wrong: rollback changes to the radix-tree */
 | 
							struct page *page;
 | 
				
			||||||
 | 
							/* Something went wrong: roll back page cache changes */
 | 
				
			||||||
		shmem_uncharge(mapping->host, nr_none);
 | 
							shmem_uncharge(mapping->host, nr_none);
 | 
				
			||||||
		xa_lock_irq(&mapping->i_pages);
 | 
							xas_lock_irq(&xas);
 | 
				
			||||||
		radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 | 
							xas_set(&xas, start);
 | 
				
			||||||
			if (iter.index >= end)
 | 
							xas_for_each(&xas, page, end - 1) {
 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
			page = list_first_entry_or_null(&pagelist,
 | 
								page = list_first_entry_or_null(&pagelist,
 | 
				
			||||||
					struct page, lru);
 | 
										struct page, lru);
 | 
				
			||||||
			if (!page || iter.index < page->index) {
 | 
								if (!page || xas.xa_index < page->index) {
 | 
				
			||||||
				if (!nr_none)
 | 
									if (!nr_none)
 | 
				
			||||||
					break;
 | 
										break;
 | 
				
			||||||
				nr_none--;
 | 
									nr_none--;
 | 
				
			||||||
				/* Put holes back where they were */
 | 
									/* Put holes back where they were */
 | 
				
			||||||
				radix_tree_delete(&mapping->i_pages, iter.index);
 | 
									xas_store(&xas, NULL);
 | 
				
			||||||
				continue;
 | 
									continue;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			VM_BUG_ON_PAGE(page->index != iter.index, page);
 | 
								VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/* Unfreeze the page. */
 | 
								/* Unfreeze the page. */
 | 
				
			||||||
			list_del(&page->lru);
 | 
								list_del(&page->lru);
 | 
				
			||||||
			page_ref_unfreeze(page, 2);
 | 
								page_ref_unfreeze(page, 2);
 | 
				
			||||||
			radix_tree_replace_slot(&mapping->i_pages, slot, page);
 | 
								xas_store(&xas, page);
 | 
				
			||||||
			slot = radix_tree_iter_resume(slot, &iter);
 | 
								xas_pause(&xas);
 | 
				
			||||||
			xa_unlock_irq(&mapping->i_pages);
 | 
								xas_unlock_irq(&xas);
 | 
				
			||||||
			putback_lru_page(page);
 | 
								putback_lru_page(page);
 | 
				
			||||||
			unlock_page(page);
 | 
								unlock_page(page);
 | 
				
			||||||
			xa_lock_irq(&mapping->i_pages);
 | 
								xas_lock_irq(&xas);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		VM_BUG_ON(nr_none);
 | 
							VM_BUG_ON(nr_none);
 | 
				
			||||||
		xa_unlock_irq(&mapping->i_pages);
 | 
							xas_unlock_irq(&xas);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Unfreeze new_page, caller would take care about freeing it */
 | 
							/* Unfreeze new_page, caller would take care about freeing it */
 | 
				
			||||||
		page_ref_unfreeze(new_page, 1);
 | 
							page_ref_unfreeze(new_page, 1);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue