mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	btrfs: convert the buffer_radix to an xarray
In order to fully utilize xarray tagging to improve writeback we need to convert the buffer_radix to a proper xarray. This conversion is relatively straightforward as the radix code uses the xarray underneath. Using xarray directly allows for quite a lot less code. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									656e9f51de
								
							
						
					
					
						commit
						19d7f65f03
					
				
					 5 changed files with 111 additions and 167 deletions
				
			
		|  | @ -2761,10 +2761,21 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Lockdep gets confused between our buffer_tree which requires IRQ locking because | ||||||
|  |  * we modify marks in the IRQ context, and our delayed inode xarray which doesn't | ||||||
|  |  * have these requirements. Use a class key so lockdep doesn't get them mixed up. | ||||||
|  |  */ | ||||||
|  | static struct lock_class_key buffer_xa_class; | ||||||
|  | 
 | ||||||
| void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | ||||||
| { | { | ||||||
| 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||||||
| 	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); | 
 | ||||||
|  | 	/* Use the same flags as mapping->i_pages. */ | ||||||
|  | 	xa_init_flags(&fs_info->buffer_tree, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); | ||||||
|  | 	lockdep_set_class(&fs_info->buffer_tree.xa_lock, &buffer_xa_class); | ||||||
|  | 
 | ||||||
| 	INIT_LIST_HEAD(&fs_info->trans_list); | 	INIT_LIST_HEAD(&fs_info->trans_list); | ||||||
| 	INIT_LIST_HEAD(&fs_info->dead_roots); | 	INIT_LIST_HEAD(&fs_info->dead_roots); | ||||||
| 	INIT_LIST_HEAD(&fs_info->delayed_iputs); | 	INIT_LIST_HEAD(&fs_info->delayed_iputs); | ||||||
|  | @ -2776,7 +2787,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | ||||||
| 	spin_lock_init(&fs_info->delayed_iput_lock); | 	spin_lock_init(&fs_info->delayed_iput_lock); | ||||||
| 	spin_lock_init(&fs_info->defrag_inodes_lock); | 	spin_lock_init(&fs_info->defrag_inodes_lock); | ||||||
| 	spin_lock_init(&fs_info->super_lock); | 	spin_lock_init(&fs_info->super_lock); | ||||||
| 	spin_lock_init(&fs_info->buffer_lock); |  | ||||||
| 	spin_lock_init(&fs_info->unused_bgs_lock); | 	spin_lock_init(&fs_info->unused_bgs_lock); | ||||||
| 	spin_lock_init(&fs_info->treelog_bg_lock); | 	spin_lock_init(&fs_info->treelog_bg_lock); | ||||||
| 	spin_lock_init(&fs_info->zone_active_bgs_lock); | 	spin_lock_init(&fs_info->zone_active_bgs_lock); | ||||||
|  |  | ||||||
|  | @ -1866,19 +1866,17 @@ static void set_btree_ioerr(struct extent_buffer *eb) | ||||||
|  * context. |  * context. | ||||||
|  */ |  */ | ||||||
| static struct extent_buffer *find_extent_buffer_nolock( | static struct extent_buffer *find_extent_buffer_nolock( | ||||||
| 		const struct btrfs_fs_info *fs_info, u64 start) | 		struct btrfs_fs_info *fs_info, u64 start) | ||||||
| { | { | ||||||
| 	struct extent_buffer *eb; | 	struct extent_buffer *eb; | ||||||
|  | 	unsigned long index = (start >> fs_info->sectorsize_bits); | ||||||
| 
 | 
 | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	eb = radix_tree_lookup(&fs_info->buffer_radix, | 	eb = xa_load(&fs_info->buffer_tree, index); | ||||||
| 			       start >> fs_info->sectorsize_bits); | 	if (eb && !atomic_inc_not_zero(&eb->refs)) | ||||||
| 	if (eb && atomic_inc_not_zero(&eb->refs)) { | 		eb = NULL; | ||||||
| 		rcu_read_unlock(); |  | ||||||
| 		return eb; |  | ||||||
| 	} |  | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 	return NULL; | 	return eb; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void end_bbio_meta_write(struct btrfs_bio *bbio) | static void end_bbio_meta_write(struct btrfs_bio *bbio) | ||||||
|  | @ -2742,11 +2740,10 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo | ||||||
| 
 | 
 | ||||||
| 	if (!btrfs_meta_is_subpage(fs_info)) { | 	if (!btrfs_meta_is_subpage(fs_info)) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * We do this since we'll remove the pages after we've | 		 * We do this since we'll remove the pages after we've removed | ||||||
| 		 * removed the eb from the radix tree, so we could race | 		 * the eb from the xarray, so we could race and have this page | ||||||
| 		 * and have this page now attached to the new eb.  So | 		 * now attached to the new eb.  So only clear folio if it's | ||||||
| 		 * only clear folio if it's still connected to | 		 * still connected to this eb. | ||||||
| 		 * this eb. |  | ||||||
| 		 */ | 		 */ | ||||||
| 		if (folio_test_private(folio) && folio_get_private(folio) == eb) { | 		if (folio_test_private(folio) && folio_get_private(folio) == eb) { | ||||||
| 			BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 			BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | ||||||
|  | @ -2911,9 +2908,9 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | ||||||
| { | { | ||||||
| 	int refs; | 	int refs; | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The TREE_REF bit is first set when the extent_buffer is added | 	 * The TREE_REF bit is first set when the extent_buffer is added to the | ||||||
| 	 * to the radix tree. It is also reset, if unset, when a new reference | 	 * xarray. It is also reset, if unset, when a new reference is created | ||||||
| 	 * is created by find_extent_buffer. | 	 * by find_extent_buffer. | ||||||
| 	 * | 	 * | ||||||
| 	 * It is only cleared in two cases: freeing the last non-tree | 	 * It is only cleared in two cases: freeing the last non-tree | ||||||
| 	 * reference to the extent_buffer when its STALE bit is set or | 	 * reference to the extent_buffer when its STALE bit is set or | ||||||
|  | @ -2925,13 +2922,12 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) | ||||||
| 	 * conditions between the calls to check_buffer_tree_ref in those | 	 * conditions between the calls to check_buffer_tree_ref in those | ||||||
| 	 * codepaths and clearing TREE_REF in try_release_extent_buffer. | 	 * codepaths and clearing TREE_REF in try_release_extent_buffer. | ||||||
| 	 * | 	 * | ||||||
| 	 * The actual lifetime of the extent_buffer in the radix tree is | 	 * The actual lifetime of the extent_buffer in the xarray is adequately | ||||||
| 	 * adequately protected by the refcount, but the TREE_REF bit and | 	 * protected by the refcount, but the TREE_REF bit and its corresponding | ||||||
| 	 * its corresponding reference are not. To protect against this | 	 * reference are not. To protect against this class of races, we call | ||||||
| 	 * class of races, we call check_buffer_tree_ref from the codepaths | 	 * check_buffer_tree_ref() from the code paths which trigger io. Note that | ||||||
| 	 * which trigger io. Note that once io is initiated, TREE_REF can no | 	 * once io is initiated, TREE_REF can no longer be cleared, so that is | ||||||
| 	 * longer be cleared, so that is the moment at which any such race is | 	 * the moment at which any such race is best fixed. | ||||||
| 	 * best fixed. |  | ||||||
| 	 */ | 	 */ | ||||||
| 	refs = atomic_read(&eb->refs); | 	refs = atomic_read(&eb->refs); | ||||||
| 	if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 	if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | ||||||
|  | @ -2995,23 +2991,25 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | ||||||
| 		return ERR_PTR(-ENOMEM); | 		return ERR_PTR(-ENOMEM); | ||||||
| 	eb->fs_info = fs_info; | 	eb->fs_info = fs_info; | ||||||
| again: | again: | ||||||
| 	ret = radix_tree_preload(GFP_NOFS); | 	xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 	if (ret) { | 	exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->sectorsize_bits, | ||||||
| 		exists = ERR_PTR(ret); | 			      NULL, eb, GFP_NOFS); | ||||||
|  | 	if (xa_is_err(exists)) { | ||||||
|  | 		ret = xa_err(exists); | ||||||
|  | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 		btrfs_release_extent_buffer(eb); | ||||||
|  | 		return ERR_PTR(ret); | ||||||
|  | 	} | ||||||
|  | 	if (exists) { | ||||||
|  | 		if (!atomic_inc_not_zero(&exists->refs)) { | ||||||
|  | 			/* The extent buffer is being freed, retry. */ | ||||||
|  | 			xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 			goto again; | ||||||
|  | 		} | ||||||
|  | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 		goto free_eb; | 		goto free_eb; | ||||||
| 	} | 	} | ||||||
| 	spin_lock(&fs_info->buffer_lock); | 	xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 	ret = radix_tree_insert(&fs_info->buffer_radix, |  | ||||||
| 				start >> fs_info->sectorsize_bits, eb); |  | ||||||
| 	spin_unlock(&fs_info->buffer_lock); |  | ||||||
| 	radix_tree_preload_end(); |  | ||||||
| 	if (ret == -EEXIST) { |  | ||||||
| 		exists = find_extent_buffer(fs_info, start); |  | ||||||
| 		if (exists) |  | ||||||
| 			goto free_eb; |  | ||||||
| 		else |  | ||||||
| 			goto again; |  | ||||||
| 	} |  | ||||||
| 	check_buffer_tree_ref(eb); | 	check_buffer_tree_ref(eb); | ||||||
| 
 | 
 | ||||||
| 	return eb; | 	return eb; | ||||||
|  | @ -3032,9 +3030,9 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info, | ||||||
| 	lockdep_assert_held(&folio->mapping->i_private_lock); | 	lockdep_assert_held(&folio->mapping->i_private_lock); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * For subpage case, we completely rely on radix tree to ensure we | 	 * For subpage case, we completely rely on xarray to ensure we don't try | ||||||
| 	 * don't try to insert two ebs for the same bytenr.  So here we always | 	 * to insert two ebs for the same bytenr.  So here we always return NULL | ||||||
| 	 * return NULL and just continue. | 	 * and just continue. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (btrfs_meta_is_subpage(fs_info)) | 	if (btrfs_meta_is_subpage(fs_info)) | ||||||
| 		return NULL; | 		return NULL; | ||||||
|  | @ -3165,7 +3163,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * To inform we have an extra eb under allocation, so that | 	 * To inform we have an extra eb under allocation, so that | ||||||
| 	 * detach_extent_buffer_page() won't release the folio private when the | 	 * detach_extent_buffer_page() won't release the folio private when the | ||||||
| 	 * eb hasn't been inserted into radix tree yet. | 	 * eb hasn't been inserted into the xarray yet. | ||||||
| 	 * | 	 * | ||||||
| 	 * The ref will be decreased when the eb releases the page, in | 	 * The ref will be decreased when the eb releases the page, in | ||||||
| 	 * detach_extent_buffer_page().  Thus needs no special handling in the | 	 * detach_extent_buffer_page().  Thus needs no special handling in the | ||||||
|  | @ -3299,10 +3297,9 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * We can't unlock the pages just yet since the extent buffer | 		 * We can't unlock the pages just yet since the extent buffer | ||||||
| 		 * hasn't been properly inserted in the radix tree, this | 		 * hasn't been properly inserted into the xarray, this opens a | ||||||
| 		 * opens a race with btree_release_folio which can free a page | 		 * race with btree_release_folio() which can free a page while we | ||||||
| 		 * while we are still filling in all pages for the buffer and | 		 * are still filling in all pages for the buffer and we could crash. | ||||||
| 		 * we could crash. |  | ||||||
| 		 */ | 		 */ | ||||||
| 	} | 	} | ||||||
| 	if (uptodate) | 	if (uptodate) | ||||||
|  | @ -3311,23 +3308,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | ||||||
| 	if (page_contig) | 	if (page_contig) | ||||||
| 		eb->addr = folio_address(eb->folios[0]) + offset_in_page(eb->start); | 		eb->addr = folio_address(eb->folios[0]) + offset_in_page(eb->start); | ||||||
| again: | again: | ||||||
| 	ret = radix_tree_preload(GFP_NOFS); | 	xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 	if (ret) | 	existing_eb = __xa_cmpxchg(&fs_info->buffer_tree, | ||||||
|  | 				   start >> fs_info->sectorsize_bits, NULL, eb, | ||||||
|  | 				   GFP_NOFS); | ||||||
|  | 	if (xa_is_err(existing_eb)) { | ||||||
|  | 		ret = xa_err(existing_eb); | ||||||
|  | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 |  | ||||||
| 	spin_lock(&fs_info->buffer_lock); |  | ||||||
| 	ret = radix_tree_insert(&fs_info->buffer_radix, |  | ||||||
| 				start >> fs_info->sectorsize_bits, eb); |  | ||||||
| 	spin_unlock(&fs_info->buffer_lock); |  | ||||||
| 	radix_tree_preload_end(); |  | ||||||
| 	if (ret == -EEXIST) { |  | ||||||
| 		ret = 0; |  | ||||||
| 		existing_eb = find_extent_buffer(fs_info, start); |  | ||||||
| 		if (existing_eb) |  | ||||||
| 			goto out; |  | ||||||
| 		else |  | ||||||
| 			goto again; |  | ||||||
| 	} | 	} | ||||||
|  | 	if (existing_eb) { | ||||||
|  | 		if (!atomic_inc_not_zero(&existing_eb->refs)) { | ||||||
|  | 			xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 			goto again; | ||||||
|  | 		} | ||||||
|  | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 		goto out; | ||||||
|  | 	} | ||||||
|  | 	xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 
 | ||||||
| 	/* add one reference for the tree */ | 	/* add one reference for the tree */ | ||||||
| 	check_buffer_tree_ref(eb); | 	check_buffer_tree_ref(eb); | ||||||
| 
 | 
 | ||||||
|  | @ -3397,10 +3396,23 @@ static int release_extent_buffer(struct extent_buffer *eb) | ||||||
| 
 | 
 | ||||||
| 		spin_unlock(&eb->refs_lock); | 		spin_unlock(&eb->refs_lock); | ||||||
| 
 | 
 | ||||||
| 		spin_lock(&fs_info->buffer_lock); | 		/*
 | ||||||
| 		radix_tree_delete_item(&fs_info->buffer_radix, | 		 * We're erasing, theoretically there will be no allocations, so | ||||||
| 				       eb->start >> fs_info->sectorsize_bits, eb); | 		 * just use GFP_ATOMIC. | ||||||
| 		spin_unlock(&fs_info->buffer_lock); | 		 * | ||||||
|  | 		 * We use cmpxchg instead of erase because we do not know if | ||||||
|  | 		 * this eb is actually in the tree or not, we could be cleaning | ||||||
|  | 		 * up an eb that we allocated but never inserted into the tree. | ||||||
|  | 		 * Thus use cmpxchg to remove it from the tree if it is there, | ||||||
|  | 		 * or leave the other entry if this isn't in the tree. | ||||||
|  | 		 * | ||||||
|  | 		 * The documentation says that putting a NULL value is the same | ||||||
|  | 		 * as erase as long as XA_FLAGS_ALLOC is not set, which it isn't | ||||||
|  | 		 * in this case. | ||||||
|  | 		 */ | ||||||
|  | 		xa_cmpxchg_irq(&fs_info->buffer_tree, | ||||||
|  | 			       eb->start >> fs_info->sectorsize_bits, eb, NULL, | ||||||
|  | 			       GFP_ATOMIC); | ||||||
| 
 | 
 | ||||||
| 		btrfs_leak_debug_del_eb(eb); | 		btrfs_leak_debug_del_eb(eb); | ||||||
| 		/* Should be safe to release folios at this point. */ | 		/* Should be safe to release folios at this point. */ | ||||||
|  | @ -4231,71 +4243,17 @@ void memmove_extent_buffer(const struct extent_buffer *dst, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define GANG_LOOKUP_SIZE	16 |  | ||||||
| static struct extent_buffer *get_next_extent_buffer( |  | ||||||
| 		const struct btrfs_fs_info *fs_info, struct folio *folio, u64 bytenr) |  | ||||||
| { |  | ||||||
| 	struct extent_buffer *gang[GANG_LOOKUP_SIZE]; |  | ||||||
| 	struct extent_buffer *found = NULL; |  | ||||||
| 	u64 folio_start = folio_pos(folio); |  | ||||||
| 	u64 cur = folio_start; |  | ||||||
| 
 |  | ||||||
| 	ASSERT(in_range(bytenr, folio_start, PAGE_SIZE)); |  | ||||||
| 	lockdep_assert_held(&fs_info->buffer_lock); |  | ||||||
| 
 |  | ||||||
| 	while (cur < folio_start + PAGE_SIZE) { |  | ||||||
| 		int ret; |  | ||||||
| 		int i; |  | ||||||
| 
 |  | ||||||
| 		ret = radix_tree_gang_lookup(&fs_info->buffer_radix, |  | ||||||
| 				(void **)gang, cur >> fs_info->sectorsize_bits, |  | ||||||
| 				min_t(unsigned int, GANG_LOOKUP_SIZE, |  | ||||||
| 				      PAGE_SIZE / fs_info->nodesize)); |  | ||||||
| 		if (ret == 0) |  | ||||||
| 			goto out; |  | ||||||
| 		for (i = 0; i < ret; i++) { |  | ||||||
| 			/* Already beyond page end */ |  | ||||||
| 			if (gang[i]->start >= folio_start + PAGE_SIZE) |  | ||||||
| 				goto out; |  | ||||||
| 			/* Found one */ |  | ||||||
| 			if (gang[i]->start >= bytenr) { |  | ||||||
| 				found = gang[i]; |  | ||||||
| 				goto out; |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 		cur = gang[ret - 1]->start + gang[ret - 1]->len; |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	return found; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int try_release_subpage_extent_buffer(struct folio *folio) | static int try_release_subpage_extent_buffer(struct folio *folio) | ||||||
| { | { | ||||||
| 	struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); | 	struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); | ||||||
| 	u64 cur = folio_pos(folio); | 	struct extent_buffer *eb; | ||||||
| 	const u64 end = cur + PAGE_SIZE; | 	unsigned long start = (folio_pos(folio) >> fs_info->sectorsize_bits); | ||||||
|  | 	unsigned long index = start; | ||||||
|  | 	unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1; | ||||||
| 	int ret; | 	int ret; | ||||||
| 
 | 
 | ||||||
| 	while (cur < end) { | 	xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 		struct extent_buffer *eb = NULL; | 	xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { | ||||||
| 
 |  | ||||||
| 		/*
 |  | ||||||
| 		 * Unlike try_release_extent_buffer() which uses folio private |  | ||||||
| 		 * to grab buffer, for subpage case we rely on radix tree, thus |  | ||||||
| 		 * we need to ensure radix tree consistency. |  | ||||||
| 		 * |  | ||||||
| 		 * We also want an atomic snapshot of the radix tree, thus go |  | ||||||
| 		 * with spinlock rather than RCU. |  | ||||||
| 		 */ |  | ||||||
| 		spin_lock(&fs_info->buffer_lock); |  | ||||||
| 		eb = get_next_extent_buffer(fs_info, folio, cur); |  | ||||||
| 		if (!eb) { |  | ||||||
| 			/* No more eb in the page range after or at cur */ |  | ||||||
| 			spin_unlock(&fs_info->buffer_lock); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		cur = eb->start + eb->len; |  | ||||||
| 
 |  | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * The same as try_release_extent_buffer(), to ensure the eb | 		 * The same as try_release_extent_buffer(), to ensure the eb | ||||||
| 		 * won't disappear out from under us. | 		 * won't disappear out from under us. | ||||||
|  | @ -4303,10 +4261,9 @@ static int try_release_subpage_extent_buffer(struct folio *folio) | ||||||
| 		spin_lock(&eb->refs_lock); | 		spin_lock(&eb->refs_lock); | ||||||
| 		if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { | 		if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { | ||||||
| 			spin_unlock(&eb->refs_lock); | 			spin_unlock(&eb->refs_lock); | ||||||
| 			spin_unlock(&fs_info->buffer_lock); | 			continue; | ||||||
| 			break; |  | ||||||
| 		} | 		} | ||||||
| 		spin_unlock(&fs_info->buffer_lock); | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * If tree ref isn't set then we know the ref on this eb is a | 		 * If tree ref isn't set then we know the ref on this eb is a | ||||||
|  | @ -4324,7 +4281,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio) | ||||||
| 		 * release_extent_buffer() will release the refs_lock. | 		 * release_extent_buffer() will release the refs_lock. | ||||||
| 		 */ | 		 */ | ||||||
| 		release_extent_buffer(eb); | 		release_extent_buffer(eb); | ||||||
|  | 		xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 	} | 	} | ||||||
|  | 	xa_unlock_irq(&fs_info->buffer_tree); | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Finally to check if we have cleared folio private, as if we have | 	 * Finally to check if we have cleared folio private, as if we have | ||||||
| 	 * released all ebs in the page, the folio private should be cleared now. | 	 * released all ebs in the page, the folio private should be cleared now. | ||||||
|  |  | ||||||
|  | @ -777,10 +777,8 @@ struct btrfs_fs_info { | ||||||
| 
 | 
 | ||||||
| 	struct btrfs_delayed_root *delayed_root; | 	struct btrfs_delayed_root *delayed_root; | ||||||
| 
 | 
 | ||||||
| 	/* Extent buffer radix tree */ |  | ||||||
| 	spinlock_t buffer_lock; |  | ||||||
| 	/* Entries are eb->start / sectorsize */ | 	/* Entries are eb->start / sectorsize */ | ||||||
| 	struct radix_tree_root buffer_radix; | 	struct xarray buffer_tree; | ||||||
| 
 | 
 | ||||||
| 	/* Next backup root to be overwritten */ | 	/* Next backup root to be overwritten */ | ||||||
| 	int backup_root_index; | 	int backup_root_index; | ||||||
|  |  | ||||||
|  | @ -157,9 +157,9 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) | ||||||
| 
 | 
 | ||||||
| void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) | void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) | ||||||
| { | { | ||||||
| 	struct radix_tree_iter iter; |  | ||||||
| 	void **slot; |  | ||||||
| 	struct btrfs_device *dev, *tmp; | 	struct btrfs_device *dev, *tmp; | ||||||
|  | 	struct extent_buffer *eb; | ||||||
|  | 	unsigned long index; | ||||||
| 
 | 
 | ||||||
| 	if (!fs_info) | 	if (!fs_info) | ||||||
| 		return; | 		return; | ||||||
|  | @ -169,25 +169,13 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) | ||||||
| 
 | 
 | ||||||
| 	test_mnt->mnt_sb->s_fs_info = NULL; | 	test_mnt->mnt_sb->s_fs_info = NULL; | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&fs_info->buffer_lock); | 	xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { | 	xa_for_each(&fs_info->buffer_tree, index, eb) { | ||||||
| 		struct extent_buffer *eb; | 		xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 
 | 		free_extent_buffer(eb); | ||||||
| 		eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); | 		xa_lock_irq(&fs_info->buffer_tree); | ||||||
| 		if (!eb) |  | ||||||
| 			continue; |  | ||||||
| 		/* Shouldn't happen but that kind of thinking creates CVE's */ |  | ||||||
| 		if (radix_tree_exception(eb)) { |  | ||||||
| 			if (radix_tree_deref_retry(eb)) |  | ||||||
| 				slot = radix_tree_iter_retry(&iter); |  | ||||||
| 			continue; |  | ||||||
| 		} |  | ||||||
| 		slot = radix_tree_iter_resume(slot, &iter); |  | ||||||
| 		spin_unlock(&fs_info->buffer_lock); |  | ||||||
| 		free_extent_buffer_stale(eb); |  | ||||||
| 		spin_lock(&fs_info->buffer_lock); |  | ||||||
| 	} | 	} | ||||||
| 	spin_unlock(&fs_info->buffer_lock); | 	xa_unlock_irq(&fs_info->buffer_tree); | ||||||
| 
 | 
 | ||||||
| 	btrfs_mapping_tree_free(fs_info); | 	btrfs_mapping_tree_free(fs_info); | ||||||
| 	list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices, | 	list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices, | ||||||
|  |  | ||||||
|  | @ -2171,27 +2171,15 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) | ||||||
| { | { | ||||||
| 	struct btrfs_fs_info *fs_info = block_group->fs_info; | 	struct btrfs_fs_info *fs_info = block_group->fs_info; | ||||||
| 	const u64 end = block_group->start + block_group->length; | 	const u64 end = block_group->start + block_group->length; | ||||||
| 	struct radix_tree_iter iter; |  | ||||||
| 	struct extent_buffer *eb; | 	struct extent_buffer *eb; | ||||||
| 	void __rcu **slot; | 	unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits); | ||||||
| 
 | 
 | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, | 	xa_for_each_start(&fs_info->buffer_tree, index, eb, start) { | ||||||
| 				 block_group->start >> fs_info->sectorsize_bits) { |  | ||||||
| 		eb = radix_tree_deref_slot(slot); |  | ||||||
| 		if (!eb) |  | ||||||
| 			continue; |  | ||||||
| 		if (radix_tree_deref_retry(eb)) { |  | ||||||
| 			slot = radix_tree_iter_retry(&iter); |  | ||||||
| 			continue; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		if (eb->start < block_group->start) | 		if (eb->start < block_group->start) | ||||||
| 			continue; | 			continue; | ||||||
| 		if (eb->start >= end) | 		if (eb->start >= end) | ||||||
| 			break; | 			break; | ||||||
| 
 |  | ||||||
| 		slot = radix_tree_iter_resume(slot, &iter); |  | ||||||
| 		rcu_read_unlock(); | 		rcu_read_unlock(); | ||||||
| 		wait_on_extent_buffer_writeback(eb); | 		wait_on_extent_buffer_writeback(eb); | ||||||
| 		rcu_read_lock(); | 		rcu_read_lock(); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Josef Bacik
						Josef Bacik