mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	bcachefs: BTREE_ITER_WITH_JOURNAL
This adds a new btree iterator flag, BTREE_ITER_WITH_JOURNAL, that is automatically enabled when initializing a btree iterator before journal replay has completed - it overlays the contents of the journal with the btree. This lets us delete bch2_btree_and_journal_walk() and just use the normal btree iterator interface instead - which also lets us delete a significant amount of duplicated code. Note that BTREE_ITER_WITH_JOURNAL is still unoptimized in this patch - we're redoing the binary search over keys in the journal every time we call bch2_btree_iter_peek(). Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
		
							parent
							
								
									f28620c108
								
							
						
					
					
						commit
						5222a4607c
					
				
					 10 changed files with 344 additions and 366 deletions
				
			
		| 
						 | 
					@ -340,46 +340,46 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 | 
				
			||||||
#undef  x
 | 
					#undef  x
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct bch_dev *ca;
 | 
					 | 
				
			||||||
	struct bucket *g;
 | 
					 | 
				
			||||||
	struct bkey_alloc_unpacked u;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!bkey_is_alloc(k.k))
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ca = bch_dev_bkey_exists(c, k.k->p.inode);
 | 
					 | 
				
			||||||
	g = bucket(ca, k.k->p.offset);
 | 
					 | 
				
			||||||
	u = bch2_alloc_unpack(k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*bucket_gen(ca, k.k->p.offset) = u.gen;
 | 
					 | 
				
			||||||
	g->_mark.gen		= u.gen;
 | 
					 | 
				
			||||||
	g->_mark.data_type	= u.data_type;
 | 
					 | 
				
			||||||
	g->_mark.dirty_sectors	= u.dirty_sectors;
 | 
					 | 
				
			||||||
	g->_mark.cached_sectors	= u.cached_sectors;
 | 
					 | 
				
			||||||
	g->_mark.stripe		= u.stripe != 0;
 | 
					 | 
				
			||||||
	g->stripe		= u.stripe;
 | 
					 | 
				
			||||||
	g->stripe_redundancy	= u.stripe_redundancy;
 | 
					 | 
				
			||||||
	g->io_time[READ]	= u.read_time;
 | 
					 | 
				
			||||||
	g->io_time[WRITE]	= u.write_time;
 | 
					 | 
				
			||||||
	g->oldest_gen		= u.oldest_gen;
 | 
					 | 
				
			||||||
	g->gen_valid		= 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int bch2_alloc_read(struct bch_fs *c)
 | 
					int bch2_alloc_read(struct bch_fs *c)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btree_trans trans;
 | 
						struct btree_trans trans;
 | 
				
			||||||
 | 
						struct btree_iter iter;
 | 
				
			||||||
 | 
						struct bkey_s_c k;
 | 
				
			||||||
 | 
						struct bch_dev *ca;
 | 
				
			||||||
 | 
						struct bucket *g;
 | 
				
			||||||
 | 
						struct bkey_alloc_unpacked u;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_init(&trans, c, 0, 0);
 | 
						bch2_trans_init(&trans, c, 0, 0);
 | 
				
			||||||
	down_read(&c->gc_lock);
 | 
						down_read(&c->gc_lock);
 | 
				
			||||||
	ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
 | 
					
 | 
				
			||||||
 | 
						for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
 | 
				
			||||||
 | 
								   BTREE_ITER_PREFETCH, k, ret) {
 | 
				
			||||||
 | 
							if (!bkey_is_alloc(k.k))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ca = bch_dev_bkey_exists(c, k.k->p.inode);
 | 
				
			||||||
 | 
							g = bucket(ca, k.k->p.offset);
 | 
				
			||||||
 | 
							u = bch2_alloc_unpack(k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							*bucket_gen(ca, k.k->p.offset) = u.gen;
 | 
				
			||||||
 | 
							g->_mark.gen		= u.gen;
 | 
				
			||||||
 | 
							g->_mark.data_type	= u.data_type;
 | 
				
			||||||
 | 
							g->_mark.dirty_sectors	= u.dirty_sectors;
 | 
				
			||||||
 | 
							g->_mark.cached_sectors	= u.cached_sectors;
 | 
				
			||||||
 | 
							g->_mark.stripe		= u.stripe != 0;
 | 
				
			||||||
 | 
							g->stripe		= u.stripe;
 | 
				
			||||||
 | 
							g->stripe_redundancy	= u.stripe_redundancy;
 | 
				
			||||||
 | 
							g->io_time[READ]	= u.read_time;
 | 
				
			||||||
 | 
							g->io_time[WRITE]	= u.write_time;
 | 
				
			||||||
 | 
							g->oldest_gen		= u.oldest_gen;
 | 
				
			||||||
 | 
							g->gen_valid		= 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						bch2_trans_iter_exit(&trans, &iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	up_read(&c->gc_lock);
 | 
						up_read(&c->gc_lock);
 | 
				
			||||||
	bch2_trans_exit(&trans);
 | 
						bch2_trans_exit(&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ret) {
 | 
						if (ret) {
 | 
				
			||||||
		bch_err(c, "error reading alloc info: %i", ret);
 | 
							bch_err(c, "error reading alloc info: %i", ret);
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -860,7 +860,6 @@ mempool_t		bio_bounce_pages;
 | 
				
			||||||
	u64			reflink_hint;
 | 
						u64			reflink_hint;
 | 
				
			||||||
	reflink_gc_table	reflink_gc_table;
 | 
						reflink_gc_table	reflink_gc_table;
 | 
				
			||||||
	size_t			reflink_gc_nr;
 | 
						size_t			reflink_gc_nr;
 | 
				
			||||||
	size_t			reflink_gc_idx;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* VFS IO PATH - fs-io.c */
 | 
						/* VFS IO PATH - fs-io.c */
 | 
				
			||||||
	struct bio_set		writepage_bioset;
 | 
						struct bio_set		writepage_bioset;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1342,59 +1342,6 @@ static int bch2_gc_start(struct bch_fs *c,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans,
 | 
					 | 
				
			||||||
					   struct bkey_s_c k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct reflink_gc *r;
 | 
					 | 
				
			||||||
	const __le64 *refcount = bkey_refcount_c(k);
 | 
					 | 
				
			||||||
	char buf[200];
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!refcount)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
 | 
					 | 
				
			||||||
	if (!r)
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!r ||
 | 
					 | 
				
			||||||
	    r->offset != k.k->p.offset ||
 | 
					 | 
				
			||||||
	    r->size != k.k->size) {
 | 
					 | 
				
			||||||
		bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
 | 
					 | 
				
			||||||
			"reflink key has wrong refcount:\n"
 | 
					 | 
				
			||||||
			"  %s\n"
 | 
					 | 
				
			||||||
			"  should be %u",
 | 
					 | 
				
			||||||
			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
 | 
					 | 
				
			||||||
			r->refcount)) {
 | 
					 | 
				
			||||||
		struct bkey_i *new;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (!new) {
 | 
					 | 
				
			||||||
			ret = -ENOMEM;
 | 
					 | 
				
			||||||
			goto fsck_err;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		bkey_reassemble(new, k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!r->refcount) {
 | 
					 | 
				
			||||||
			new->k.type = KEY_TYPE_deleted;
 | 
					 | 
				
			||||||
			new->k.size = 0;
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			*bkey_refcount(new) = cpu_to_le64(r->refcount);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
 | 
					 | 
				
			||||||
		kfree(new);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
fsck_err:
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
					static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
				bool metadata_only)
 | 
									bool metadata_only)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1411,14 +1358,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_init(&trans, c, 0, 0);
 | 
						bch2_trans_init(&trans, c, 0, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (initial) {
 | 
					 | 
				
			||||||
		c->reflink_gc_idx = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
 | 
					 | 
				
			||||||
				bch2_gc_reflink_done_initial_fn);
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
 | 
						for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
 | 
				
			||||||
			   BTREE_ITER_PREFETCH, k, ret) {
 | 
								   BTREE_ITER_PREFETCH, k, ret) {
 | 
				
			||||||
		const __le64 *refcount = bkey_refcount_c(k);
 | 
							const __le64 *refcount = bkey_refcount_c(k);
 | 
				
			||||||
| 
						 | 
					@ -1426,7 +1365,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
		if (!refcount)
 | 
							if (!refcount)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		r = genradix_ptr(&c->reflink_gc_table, idx);
 | 
							r = genradix_ptr(&c->reflink_gc_table, idx++);
 | 
				
			||||||
		if (!r ||
 | 
							if (!r ||
 | 
				
			||||||
		    r->offset != k.k->p.offset ||
 | 
							    r->offset != k.k->p.offset ||
 | 
				
			||||||
		    r->size != k.k->size) {
 | 
							    r->size != k.k->size) {
 | 
				
			||||||
| 
						 | 
					@ -1456,7 +1395,9 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
				*bkey_refcount(new) = cpu_to_le64(r->refcount);
 | 
									*bkey_refcount(new) = cpu_to_le64(r->refcount);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			ret = __bch2_trans_do(&trans, NULL, NULL, 0,
 | 
								ret = initial
 | 
				
			||||||
 | 
								       ? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
 | 
				
			||||||
 | 
								       : __bch2_trans_do(&trans, NULL, NULL, 0,
 | 
				
			||||||
					__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
 | 
										__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
 | 
				
			||||||
			kfree(new);
 | 
								kfree(new);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1466,64 +1407,21 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
fsck_err:
 | 
					fsck_err:
 | 
				
			||||||
	bch2_trans_iter_exit(&trans, &iter);
 | 
						bch2_trans_iter_exit(&trans, &iter);
 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	c->reflink_gc_nr = 0;
 | 
						c->reflink_gc_nr = 0;
 | 
				
			||||||
	bch2_trans_exit(&trans);
 | 
						bch2_trans_exit(&trans);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
 | 
					 | 
				
			||||||
					   struct bkey_s_c k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct gc_stripe *m;
 | 
					 | 
				
			||||||
	const struct bch_stripe *s;
 | 
					 | 
				
			||||||
	char buf[200];
 | 
					 | 
				
			||||||
	unsigned i;
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (k.k->type != KEY_TYPE_stripe)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	s = bkey_s_c_to_stripe(k).v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < s->nr_blocks; i++)
 | 
					 | 
				
			||||||
		if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
 | 
					 | 
				
			||||||
			goto inconsistent;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
inconsistent:
 | 
					 | 
				
			||||||
	if (fsck_err_on(true, c,
 | 
					 | 
				
			||||||
			"stripe has wrong block sector count %u:\n"
 | 
					 | 
				
			||||||
			"  %s\n"
 | 
					 | 
				
			||||||
			"  should be %u", i,
 | 
					 | 
				
			||||||
			(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
 | 
					 | 
				
			||||||
			m ? m->block_sectors[i] : 0)) {
 | 
					 | 
				
			||||||
		struct bkey_i_stripe *new;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (!new) {
 | 
					 | 
				
			||||||
			ret = -ENOMEM;
 | 
					 | 
				
			||||||
			goto fsck_err;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		bkey_reassemble(&new->k_i, k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		for (i = 0; i < new->v.nr_blocks; i++)
 | 
					 | 
				
			||||||
			stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
 | 
					 | 
				
			||||||
		kfree(new);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
fsck_err:
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
 | 
					static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
				bool metadata_only)
 | 
									bool metadata_only)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btree_trans trans;
 | 
						struct btree_trans trans;
 | 
				
			||||||
 | 
						struct btree_iter iter;
 | 
				
			||||||
 | 
						struct bkey_s_c k;
 | 
				
			||||||
 | 
						struct gc_stripe *m;
 | 
				
			||||||
 | 
						const struct bch_stripe *s;
 | 
				
			||||||
 | 
						char buf[200];
 | 
				
			||||||
 | 
						unsigned i;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (metadata_only)
 | 
						if (metadata_only)
 | 
				
			||||||
| 
						 | 
					@ -1531,39 +1429,52 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_init(&trans, c, 0, 0);
 | 
						bch2_trans_init(&trans, c, 0, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (initial) {
 | 
						for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
 | 
				
			||||||
		ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
 | 
								   BTREE_ITER_PREFETCH, k, ret) {
 | 
				
			||||||
				bch2_gc_stripes_done_initial_fn);
 | 
							if (k.k->type != KEY_TYPE_stripe)
 | 
				
			||||||
	} else {
 | 
								continue;
 | 
				
			||||||
		BUG();
 | 
					
 | 
				
			||||||
 | 
							s = bkey_s_c_to_stripe(k).v;
 | 
				
			||||||
 | 
							m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < s->nr_blocks; i++)
 | 
				
			||||||
 | 
								if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
 | 
				
			||||||
 | 
									goto inconsistent;
 | 
				
			||||||
 | 
							continue;
 | 
				
			||||||
 | 
					inconsistent:
 | 
				
			||||||
 | 
							if (fsck_err_on(true, c,
 | 
				
			||||||
 | 
									"stripe has wrong block sector count %u:\n"
 | 
				
			||||||
 | 
									"  %s\n"
 | 
				
			||||||
 | 
									"  should be %u", i,
 | 
				
			||||||
 | 
									(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
 | 
				
			||||||
 | 
									m ? m->block_sectors[i] : 0)) {
 | 
				
			||||||
 | 
								struct bkey_i_stripe *new;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
 | 
				
			||||||
 | 
								if (!new) {
 | 
				
			||||||
 | 
									ret = -ENOMEM;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								bkey_reassemble(&new->k_i, k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								for (i = 0; i < new->v.nr_blocks; i++)
 | 
				
			||||||
 | 
									stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								ret = initial
 | 
				
			||||||
 | 
									? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
 | 
				
			||||||
 | 
									: __bch2_trans_do(&trans, NULL, NULL, 0,
 | 
				
			||||||
 | 
										__bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
 | 
				
			||||||
 | 
								kfree(new);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					fsck_err:
 | 
				
			||||||
 | 
						bch2_trans_iter_exit(&trans, &iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_exit(&trans);
 | 
						bch2_trans_exit(&trans);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
 | 
					 | 
				
			||||||
					    struct bkey_s_c k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct reflink_gc *r;
 | 
					 | 
				
			||||||
	const __le64 *refcount = bkey_refcount_c(k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!refcount)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
 | 
					 | 
				
			||||||
			       GFP_KERNEL);
 | 
					 | 
				
			||||||
	if (!r)
 | 
					 | 
				
			||||||
		return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	r->offset	= k.k->p.offset;
 | 
					 | 
				
			||||||
	r->size		= k.k->size;
 | 
					 | 
				
			||||||
	r->refcount	= 0;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
 | 
					static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
 | 
				
			||||||
				 bool metadata_only)
 | 
									 bool metadata_only)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1579,12 +1490,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
 | 
				
			||||||
	bch2_trans_init(&trans, c, 0, 0);
 | 
						bch2_trans_init(&trans, c, 0, 0);
 | 
				
			||||||
	c->reflink_gc_nr = 0;
 | 
						c->reflink_gc_nr = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (initial) {
 | 
					 | 
				
			||||||
		ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
 | 
					 | 
				
			||||||
						bch2_gc_reflink_start_initial_fn);
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
 | 
						for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
 | 
				
			||||||
			   BTREE_ITER_PREFETCH, k, ret) {
 | 
								   BTREE_ITER_PREFETCH, k, ret) {
 | 
				
			||||||
		const __le64 *refcount = bkey_refcount_c(k);
 | 
							const __le64 *refcount = bkey_refcount_c(k);
 | 
				
			||||||
| 
						 | 
					@ -1604,7 +1509,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
 | 
				
			||||||
		r->refcount	= 0;
 | 
							r->refcount	= 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	bch2_trans_iter_exit(&trans, &iter);
 | 
						bch2_trans_iter_exit(&trans, &iter);
 | 
				
			||||||
out:
 | 
					
 | 
				
			||||||
	bch2_trans_exit(&trans);
 | 
						bch2_trans_exit(&trans);
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,6 +12,7 @@
 | 
				
			||||||
#include "error.h"
 | 
					#include "error.h"
 | 
				
			||||||
#include "extents.h"
 | 
					#include "extents.h"
 | 
				
			||||||
#include "journal.h"
 | 
					#include "journal.h"
 | 
				
			||||||
 | 
					#include "recovery.h"
 | 
				
			||||||
#include "replicas.h"
 | 
					#include "replicas.h"
 | 
				
			||||||
#include "subvolume.h"
 | 
					#include "subvolume.h"
 | 
				
			||||||
#include "trace.h"
 | 
					#include "trace.h"
 | 
				
			||||||
| 
						 | 
					@ -1064,6 +1065,7 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
 | 
				
			||||||
static void btree_path_verify_new_node(struct btree_trans *trans,
 | 
					static void btree_path_verify_new_node(struct btree_trans *trans,
 | 
				
			||||||
				       struct btree_path *path, struct btree *b)
 | 
									       struct btree_path *path, struct btree *b)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct bch_fs *c = trans->c;
 | 
				
			||||||
	struct btree_path_level *l;
 | 
						struct btree_path_level *l;
 | 
				
			||||||
	unsigned plevel;
 | 
						unsigned plevel;
 | 
				
			||||||
	bool parent_locked;
 | 
						bool parent_locked;
 | 
				
			||||||
| 
						 | 
					@ -1072,6 +1074,9 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
 | 
				
			||||||
	if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
 | 
						if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (trans->journal_replay_not_finished)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	plevel = b->c.level + 1;
 | 
						plevel = b->c.level + 1;
 | 
				
			||||||
	if (!btree_path_node(path, plevel))
 | 
						if (!btree_path_node(path, plevel))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -1092,7 +1097,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
 | 
				
			||||||
		char buf4[100];
 | 
							char buf4[100];
 | 
				
			||||||
		struct bkey uk = bkey_unpack_key(b, k);
 | 
							struct bkey uk = bkey_unpack_key(b, k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		bch2_dump_btree_node(trans->c, l->b);
 | 
							bch2_dump_btree_node(c, l->b);
 | 
				
			||||||
		bch2_bpos_to_text(&PBUF(buf1), path->pos);
 | 
							bch2_bpos_to_text(&PBUF(buf1), path->pos);
 | 
				
			||||||
		bch2_bkey_to_text(&PBUF(buf2), &uk);
 | 
							bch2_bkey_to_text(&PBUF(buf2), &uk);
 | 
				
			||||||
		bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
 | 
							bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
 | 
				
			||||||
| 
						 | 
					@ -1283,6 +1288,41 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
 | 
				
			||||||
 | 
									 struct btree_and_journal_iter *jiter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bch_fs *c = trans->c;
 | 
				
			||||||
 | 
						struct bkey_s_c k;
 | 
				
			||||||
 | 
						struct bkey_buf tmp;
 | 
				
			||||||
 | 
						unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
 | 
				
			||||||
 | 
							? (path->level > 1 ? 0 :  2)
 | 
				
			||||||
 | 
							: (path->level > 1 ? 1 : 16);
 | 
				
			||||||
 | 
						bool was_locked = btree_node_locked(path, path->level);
 | 
				
			||||||
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bch2_bkey_buf_init(&tmp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (nr && !ret) {
 | 
				
			||||||
 | 
							if (!bch2_btree_node_relock(trans, path, path->level))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bch2_btree_and_journal_iter_advance(jiter);
 | 
				
			||||||
 | 
							k = bch2_btree_and_journal_iter_peek(jiter);
 | 
				
			||||||
 | 
							if (!k.k)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							bch2_bkey_buf_reassemble(&tmp, c, k);
 | 
				
			||||||
 | 
							ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
 | 
				
			||||||
 | 
										       path->level - 1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!was_locked)
 | 
				
			||||||
 | 
							btree_node_unlock(path, path->level);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bch2_bkey_buf_exit(&tmp, c);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
 | 
					static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
 | 
				
			||||||
					    struct btree_path *path,
 | 
										    struct btree_path *path,
 | 
				
			||||||
					    unsigned plevel, struct btree *b)
 | 
										    unsigned plevel, struct btree *b)
 | 
				
			||||||
| 
						 | 
					@ -1305,6 +1345,30 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
 | 
				
			||||||
		btree_node_unlock(path, plevel);
 | 
							btree_node_unlock(path, plevel);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
 | 
				
			||||||
 | 
											     struct btree_path *path,
 | 
				
			||||||
 | 
											     unsigned flags,
 | 
				
			||||||
 | 
											     struct bkey_buf *out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bch_fs *c = trans->c;
 | 
				
			||||||
 | 
						struct btree_path_level *l = path_l(path);
 | 
				
			||||||
 | 
						struct btree_and_journal_iter jiter;
 | 
				
			||||||
 | 
						struct bkey_s_c k;
 | 
				
			||||||
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						k = bch2_btree_and_journal_iter_peek(&jiter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bch2_bkey_buf_reassemble(out, c, k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & BTREE_ITER_PREFETCH)
 | 
				
			||||||
 | 
							ret = btree_path_prefetch_j(trans, path, &jiter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bch2_btree_and_journal_iter_exit(&jiter);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __always_inline int btree_path_down(struct btree_trans *trans,
 | 
					static __always_inline int btree_path_down(struct btree_trans *trans,
 | 
				
			||||||
					   struct btree_path *path,
 | 
										   struct btree_path *path,
 | 
				
			||||||
					   unsigned flags,
 | 
										   unsigned flags,
 | 
				
			||||||
| 
						 | 
					@ -1321,8 +1385,21 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
 | 
				
			||||||
	EBUG_ON(!btree_node_locked(path, path->level));
 | 
						EBUG_ON(!btree_node_locked(path, path->level));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_bkey_buf_init(&tmp);
 | 
						bch2_bkey_buf_init(&tmp);
 | 
				
			||||||
	bch2_bkey_buf_unpack(&tmp, c, l->b,
 | 
					
 | 
				
			||||||
			 bch2_btree_node_iter_peek(&l->iter, l->b));
 | 
						if (unlikely(trans->journal_replay_not_finished)) {
 | 
				
			||||||
 | 
							ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								goto err;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							bch2_bkey_buf_unpack(&tmp, c, l->b,
 | 
				
			||||||
 | 
									 bch2_btree_node_iter_peek(&l->iter, l->b));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (flags & BTREE_ITER_PREFETCH) {
 | 
				
			||||||
 | 
								ret = btree_path_prefetch(trans, path);
 | 
				
			||||||
 | 
								if (ret)
 | 
				
			||||||
 | 
									goto err;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
 | 
						b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
 | 
				
			||||||
	ret = PTR_ERR_OR_ZERO(b);
 | 
						ret = PTR_ERR_OR_ZERO(b);
 | 
				
			||||||
| 
						 | 
					@ -1332,13 +1409,11 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
 | 
				
			||||||
	mark_btree_node_locked(path, level, lock_type);
 | 
						mark_btree_node_locked(path, level, lock_type);
 | 
				
			||||||
	btree_path_level_init(trans, path, b);
 | 
						btree_path_level_init(trans, path, b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
 | 
						if (likely(!trans->journal_replay_not_finished &&
 | 
				
			||||||
 | 
							   tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
 | 
				
			||||||
	    unlikely(b != btree_node_mem_ptr(tmp.k)))
 | 
						    unlikely(b != btree_node_mem_ptr(tmp.k)))
 | 
				
			||||||
		btree_node_mem_ptr_set(trans, path, level + 1, b);
 | 
							btree_node_mem_ptr_set(trans, path, level + 1, b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (flags & BTREE_ITER_PREFETCH)
 | 
					 | 
				
			||||||
		ret = btree_path_prefetch(trans, path);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (btree_node_read_locked(path, level + 1))
 | 
						if (btree_node_read_locked(path, level + 1))
 | 
				
			||||||
		btree_node_unlock(path, level + 1);
 | 
							btree_node_unlock(path, level + 1);
 | 
				
			||||||
	path->level = level;
 | 
						path->level = level;
 | 
				
			||||||
| 
						 | 
					@ -2113,6 +2188,55 @@ struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
 | 
				
			||||||
 | 
											 struct btree_path *path)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct journal_keys *keys = &trans->c->journal_keys;
 | 
				
			||||||
 | 
						size_t idx = bch2_journal_key_search(keys, path->btree_id,
 | 
				
			||||||
 | 
										     path->level, path->pos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (idx < keys->nr && keys->d[idx].overwritten)
 | 
				
			||||||
 | 
							idx++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return (idx < keys->nr &&
 | 
				
			||||||
 | 
							keys->d[idx].btree_id	== path->btree_id &&
 | 
				
			||||||
 | 
							keys->d[idx].level	== path->level)
 | 
				
			||||||
 | 
							? keys->d[idx].k
 | 
				
			||||||
 | 
							: NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static noinline
 | 
				
			||||||
 | 
					struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
 | 
				
			||||||
 | 
										      struct btree_iter *iter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bkey_i *k = __btree_trans_peek_journal(trans, iter->path);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (k && !bpos_cmp(k->k.p, iter->pos)) {
 | 
				
			||||||
 | 
							iter->k = k->k;
 | 
				
			||||||
 | 
							return bkey_i_to_s_c(k);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							return bkey_s_c_null;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static noinline
 | 
				
			||||||
 | 
					struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
 | 
				
			||||||
 | 
										 struct btree_iter *iter,
 | 
				
			||||||
 | 
										 struct bkey_s_c k)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct bkey_i *next_journal =
 | 
				
			||||||
 | 
							__btree_trans_peek_journal(trans, iter->path);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (next_journal &&
 | 
				
			||||||
 | 
						    bpos_cmp(next_journal->k.p,
 | 
				
			||||||
 | 
							     k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
 | 
				
			||||||
 | 
							iter->k = next_journal->k;
 | 
				
			||||||
 | 
							k = bkey_i_to_s_c(next_journal);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return k;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
 | 
					 * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
 | 
				
			||||||
 * current position
 | 
					 * current position
 | 
				
			||||||
| 
						 | 
					@ -2141,16 +2265,12 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		next_update = btree_trans_peek_updates(iter);
 | 
					 | 
				
			||||||
		k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
 | 
							k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* * In the btree, deleted keys sort before non deleted: */
 | 
							if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
 | 
				
			||||||
		if (k.k && bkey_deleted(k.k) &&
 | 
								k = btree_trans_peek_journal(trans, iter, k);
 | 
				
			||||||
		    (!next_update ||
 | 
					
 | 
				
			||||||
		     bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
 | 
							next_update = btree_trans_peek_updates(iter);
 | 
				
			||||||
			search_key = k.k->p;
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (next_update &&
 | 
							if (next_update &&
 | 
				
			||||||
		    bpos_cmp(next_update->k.p,
 | 
							    bpos_cmp(next_update->k.p,
 | 
				
			||||||
| 
						 | 
					@ -2159,6 +2279,20 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 | 
				
			||||||
			k = bkey_i_to_s_c(next_update);
 | 
								k = bkey_i_to_s_c(next_update);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (k.k && bkey_deleted(k.k)) {
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * If we've got a whiteout, and it's after the search
 | 
				
			||||||
 | 
								 * key, advance the search key to the whiteout instead
 | 
				
			||||||
 | 
								 * of just after the whiteout - it might be a btree
 | 
				
			||||||
 | 
								 * whiteout, with a real key at the same position, since
 | 
				
			||||||
 | 
								 * in the btree deleted keys sort before non deleted.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								search_key = bpos_cmp(search_key, k.k->p)
 | 
				
			||||||
 | 
									? k.k->p
 | 
				
			||||||
 | 
									: bpos_successor(k.k->p);
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (likely(k.k)) {
 | 
							if (likely(k.k)) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * We can never have a key in a leaf node at POS_MAX, so
 | 
								 * We can never have a key in a leaf node at POS_MAX, so
 | 
				
			||||||
| 
						 | 
					@ -2249,6 +2383,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	EBUG_ON(iter->path->cached || iter->path->level);
 | 
						EBUG_ON(iter->path->cached || iter->path->level);
 | 
				
			||||||
	EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
 | 
						EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (iter->flags & BTREE_ITER_WITH_JOURNAL)
 | 
				
			||||||
 | 
							return bkey_s_c_err(-EIO);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_btree_iter_verify(iter);
 | 
						bch2_btree_iter_verify(iter);
 | 
				
			||||||
	bch2_btree_iter_verify_entry_exit(iter);
 | 
						bch2_btree_iter_verify_entry_exit(iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2395,23 +2533,18 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 | 
				
			||||||
	    !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
 | 
						    !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
 | 
				
			||||||
		struct bkey_i *next_update;
 | 
							struct bkey_i *next_update;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		next_update = btree_trans_peek_updates(iter);
 | 
							if ((next_update = btree_trans_peek_updates(iter)) &&
 | 
				
			||||||
		if (next_update &&
 | 
					 | 
				
			||||||
		    !bpos_cmp(next_update->k.p, iter->pos)) {
 | 
							    !bpos_cmp(next_update->k.p, iter->pos)) {
 | 
				
			||||||
			iter->k = next_update->k;
 | 
								iter->k = next_update->k;
 | 
				
			||||||
			k = bkey_i_to_s_c(next_update);
 | 
								k = bkey_i_to_s_c(next_update);
 | 
				
			||||||
		} else {
 | 
								goto out;
 | 
				
			||||||
			k = bch2_btree_path_peek_slot(iter->path, &iter->k);
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!k.k ||
 | 
							if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
 | 
				
			||||||
		    ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
 | 
							    (k = btree_trans_peek_slot_journal(trans, iter)).k)
 | 
				
			||||||
		     ? bpos_cmp(iter->pos, k.k->p)
 | 
								goto out;
 | 
				
			||||||
		     : bkey_cmp(iter->pos, k.k->p))) {
 | 
					
 | 
				
			||||||
			bkey_init(&iter->k);
 | 
							k = bch2_btree_path_peek_slot(iter->path, &iter->k);
 | 
				
			||||||
			iter->k.p = iter->pos;
 | 
					 | 
				
			||||||
			k = (struct bkey_s_c) { &iter->k, NULL };
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		struct bpos next;
 | 
							struct bpos next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2455,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 | 
				
			||||||
			k = (struct bkey_s_c) { &iter->k, NULL };
 | 
								k = (struct bkey_s_c) { &iter->k, NULL };
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
	iter->path->should_be_locked = true;
 | 
						iter->path->should_be_locked = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_btree_iter_verify_entry_exit(iter);
 | 
						bch2_btree_iter_verify_entry_exit(iter);
 | 
				
			||||||
| 
						 | 
					@ -2635,6 +2768,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
 | 
				
			||||||
	    btree_type_has_snapshots(btree_id))
 | 
						    btree_type_has_snapshots(btree_id))
 | 
				
			||||||
		flags |= BTREE_ITER_FILTER_SNAPSHOTS;
 | 
							flags |= BTREE_ITER_FILTER_SNAPSHOTS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (trans->journal_replay_not_finished)
 | 
				
			||||||
 | 
							flags |= BTREE_ITER_WITH_JOURNAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	iter->trans	= trans;
 | 
						iter->trans	= trans;
 | 
				
			||||||
	iter->path	= NULL;
 | 
						iter->path	= NULL;
 | 
				
			||||||
	iter->btree_id	= btree_id;
 | 
						iter->btree_id	= btree_id;
 | 
				
			||||||
| 
						 | 
					@ -2801,6 +2937,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
 | 
				
			||||||
	memset(trans, 0, sizeof(*trans));
 | 
						memset(trans, 0, sizeof(*trans));
 | 
				
			||||||
	trans->c		= c;
 | 
						trans->c		= c;
 | 
				
			||||||
	trans->ip		= _RET_IP_;
 | 
						trans->ip		= _RET_IP_;
 | 
				
			||||||
 | 
						trans->journal_replay_not_finished =
 | 
				
			||||||
 | 
							!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_alloc_paths(trans, c);
 | 
						bch2_trans_alloc_paths(trans, c);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -207,10 +207,11 @@ struct btree_node_iter {
 | 
				
			||||||
#define BTREE_ITER_CACHED_NOFILL	(1 << 8)
 | 
					#define BTREE_ITER_CACHED_NOFILL	(1 << 8)
 | 
				
			||||||
#define BTREE_ITER_CACHED_NOCREATE	(1 << 9)
 | 
					#define BTREE_ITER_CACHED_NOCREATE	(1 << 9)
 | 
				
			||||||
#define BTREE_ITER_WITH_UPDATES		(1 << 10)
 | 
					#define BTREE_ITER_WITH_UPDATES		(1 << 10)
 | 
				
			||||||
#define __BTREE_ITER_ALL_SNAPSHOTS	(1 << 11)
 | 
					#define BTREE_ITER_WITH_JOURNAL		(1 << 11)
 | 
				
			||||||
#define BTREE_ITER_ALL_SNAPSHOTS	(1 << 12)
 | 
					#define __BTREE_ITER_ALL_SNAPSHOTS	(1 << 12)
 | 
				
			||||||
#define BTREE_ITER_FILTER_SNAPSHOTS	(1 << 13)
 | 
					#define BTREE_ITER_ALL_SNAPSHOTS	(1 << 13)
 | 
				
			||||||
#define BTREE_ITER_NOPRESERVE		(1 << 14)
 | 
					#define BTREE_ITER_FILTER_SNAPSHOTS	(1 << 14)
 | 
				
			||||||
 | 
					#define BTREE_ITER_NOPRESERVE		(1 << 15)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum btree_path_uptodate {
 | 
					enum btree_path_uptodate {
 | 
				
			||||||
	BTREE_ITER_UPTODATE		= 0,
 | 
						BTREE_ITER_UPTODATE		= 0,
 | 
				
			||||||
| 
						 | 
					@ -381,6 +382,7 @@ struct btree_trans {
 | 
				
			||||||
	bool			restarted:1;
 | 
						bool			restarted:1;
 | 
				
			||||||
	bool			paths_sorted:1;
 | 
						bool			paths_sorted:1;
 | 
				
			||||||
	bool			journal_transaction_names:1;
 | 
						bool			journal_transaction_names:1;
 | 
				
			||||||
 | 
						bool			journal_replay_not_finished:1;
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For when bch2_trans_update notices we'll be splitting a compressed
 | 
						 * For when bch2_trans_update notices we'll be splitting a compressed
 | 
				
			||||||
	 * extent:
 | 
						 * extent:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,6 +16,7 @@
 | 
				
			||||||
#include "journal.h"
 | 
					#include "journal.h"
 | 
				
			||||||
#include "journal_reclaim.h"
 | 
					#include "journal_reclaim.h"
 | 
				
			||||||
#include "keylist.h"
 | 
					#include "keylist.h"
 | 
				
			||||||
 | 
					#include "recovery.h"
 | 
				
			||||||
#include "replicas.h"
 | 
					#include "replicas.h"
 | 
				
			||||||
#include "super-io.h"
 | 
					#include "super-io.h"
 | 
				
			||||||
#include "trace.h"
 | 
					#include "trace.h"
 | 
				
			||||||
| 
						 | 
					@ -1146,6 +1147,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
 | 
				
			||||||
	BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
 | 
						BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
 | 
				
			||||||
	       !btree_ptr_sectors_written(insert));
 | 
						       !btree_ptr_sectors_written(insert));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
 | 
				
			||||||
 | 
							bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
 | 
						invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
 | 
				
			||||||
		bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
 | 
							bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
 | 
				
			||||||
	if (invalid) {
 | 
						if (invalid) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -711,7 +711,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 | 
						ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
 | 
						if (!ret && unlikely(trans->journal_replay_not_finished))
 | 
				
			||||||
		bch2_drop_overwrites_from_journal(trans);
 | 
							bch2_drop_overwrites_from_journal(trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans_for_each_update(trans, i)
 | 
						trans_for_each_update(trans, i)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1558,50 +1558,48 @@ void bch2_stripes_heap_start(struct bch_fs *c)
 | 
				
			||||||
			bch2_stripes_heap_insert(c, m, iter.pos);
 | 
								bch2_stripes_heap_insert(c, m, iter.pos);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	const struct bch_stripe *s;
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct stripe *m;
 | 
					 | 
				
			||||||
	unsigned i;
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (k.k->type != KEY_TYPE_stripe)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
 | 
					 | 
				
			||||||
	if (ret)
 | 
					 | 
				
			||||||
		return ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	s = bkey_s_c_to_stripe(k).v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	m = genradix_ptr(&c->stripes, k.k->p.offset);
 | 
					 | 
				
			||||||
	m->alive	= true;
 | 
					 | 
				
			||||||
	m->sectors	= le16_to_cpu(s->sectors);
 | 
					 | 
				
			||||||
	m->algorithm	= s->algorithm;
 | 
					 | 
				
			||||||
	m->nr_blocks	= s->nr_blocks;
 | 
					 | 
				
			||||||
	m->nr_redundant	= s->nr_redundant;
 | 
					 | 
				
			||||||
	m->blocks_nonempty = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < s->nr_blocks; i++)
 | 
					 | 
				
			||||||
		m->blocks_nonempty += !!stripe_blockcount_get(s, i);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	spin_lock(&c->ec_stripes_heap_lock);
 | 
					 | 
				
			||||||
	bch2_stripes_heap_update(c, m, k.k->p.offset);
 | 
					 | 
				
			||||||
	spin_unlock(&c->ec_stripes_heap_lock);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int bch2_stripes_read(struct bch_fs *c)
 | 
					int bch2_stripes_read(struct bch_fs *c)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btree_trans trans;
 | 
						struct btree_trans trans;
 | 
				
			||||||
 | 
						struct btree_iter iter;
 | 
				
			||||||
 | 
						struct bkey_s_c k;
 | 
				
			||||||
 | 
						const struct bch_stripe *s;
 | 
				
			||||||
 | 
						struct stripe *m;
 | 
				
			||||||
 | 
						unsigned i;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_init(&trans, c, 0, 0);
 | 
						bch2_trans_init(&trans, c, 0, 0);
 | 
				
			||||||
	ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
 | 
					
 | 
				
			||||||
					  bch2_stripes_read_fn);
 | 
						for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
 | 
				
			||||||
 | 
								   BTREE_ITER_PREFETCH, k, ret) {
 | 
				
			||||||
 | 
							if (k.k->type != KEY_TYPE_stripe)
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							s = bkey_s_c_to_stripe(k).v;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							m = genradix_ptr(&c->stripes, k.k->p.offset);
 | 
				
			||||||
 | 
							m->alive	= true;
 | 
				
			||||||
 | 
							m->sectors	= le16_to_cpu(s->sectors);
 | 
				
			||||||
 | 
							m->algorithm	= s->algorithm;
 | 
				
			||||||
 | 
							m->nr_blocks	= s->nr_blocks;
 | 
				
			||||||
 | 
							m->nr_redundant	= s->nr_redundant;
 | 
				
			||||||
 | 
							m->blocks_nonempty = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							for (i = 0; i < s->nr_blocks; i++)
 | 
				
			||||||
 | 
								m->blocks_nonempty += !!stripe_blockcount_get(s, i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock(&c->ec_stripes_heap_lock);
 | 
				
			||||||
 | 
							bch2_stripes_heap_update(c, m, k.k->p.offset);
 | 
				
			||||||
 | 
							spin_unlock(&c->ec_stripes_heap_lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						bch2_trans_iter_exit(&trans, &iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bch2_trans_exit(&trans);
 | 
						bch2_trans_exit(&trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ret)
 | 
						if (ret)
 | 
				
			||||||
		bch_err(c, "error reading stripes: %i", ret);
 | 
							bch_err(c, "error reading stripes: %i", ret);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -59,23 +59,21 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
 | 
				
			||||||
static int __journal_key_cmp(enum btree_id	l_btree_id,
 | 
					static int __journal_key_cmp(enum btree_id	l_btree_id,
 | 
				
			||||||
			     unsigned		l_level,
 | 
								     unsigned		l_level,
 | 
				
			||||||
			     struct bpos	l_pos,
 | 
								     struct bpos	l_pos,
 | 
				
			||||||
			     struct journal_key *r)
 | 
								     const struct journal_key *r)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return (cmp_int(l_btree_id,	r->btree_id) ?:
 | 
						return (cmp_int(l_btree_id,	r->btree_id) ?:
 | 
				
			||||||
		cmp_int(l_level,	r->level) ?:
 | 
							cmp_int(l_level,	r->level) ?:
 | 
				
			||||||
		bpos_cmp(l_pos,	r->k->k.p));
 | 
							bpos_cmp(l_pos,	r->k->k.p));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
 | 
					static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return (cmp_int(l->btree_id,	r->btree_id) ?:
 | 
						return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
 | 
				
			||||||
		cmp_int(l->level,	r->level) ?:
 | 
					 | 
				
			||||||
		bpos_cmp(l->k->k.p,	r->k->k.p));
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static size_t journal_key_search(struct journal_keys *journal_keys,
 | 
					size_t bch2_journal_key_search(struct journal_keys *journal_keys,
 | 
				
			||||||
				 enum btree_id id, unsigned level,
 | 
								       enum btree_id id, unsigned level,
 | 
				
			||||||
				 struct bpos pos)
 | 
								       struct bpos pos)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	size_t l = 0, r = journal_keys->nr, m;
 | 
						size_t l = 0, r = journal_keys->nr, m;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -125,7 +123,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
	struct journal_keys *keys = &c->journal_keys;
 | 
						struct journal_keys *keys = &c->journal_keys;
 | 
				
			||||||
	struct journal_iter *iter;
 | 
						struct journal_iter *iter;
 | 
				
			||||||
	unsigned idx = journal_key_search(keys, id, level, k->k.p);
 | 
						size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(test_bit(BCH_FS_RW, &c->flags));
 | 
						BUG_ON(test_bit(BCH_FS_RW, &c->flags));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -164,6 +162,11 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Can only be used from the recovery thread while we're still RO - can't be
 | 
				
			||||||
 | 
					 * used once we've got RW, as journal_keys is at that point used by multiple
 | 
				
			||||||
 | 
					 * threads:
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
 | 
					int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
 | 
				
			||||||
			    unsigned level, struct bkey_i *k)
 | 
								    unsigned level, struct bkey_i *k)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -196,7 +199,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
 | 
				
			||||||
				  unsigned level, struct bpos pos)
 | 
									  unsigned level, struct bpos pos)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct journal_keys *keys = &c->journal_keys;
 | 
						struct journal_keys *keys = &c->journal_keys;
 | 
				
			||||||
	size_t idx = journal_key_search(keys, btree, level, pos);
 | 
						size_t idx = bch2_journal_key_search(keys, btree, level, pos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (idx < keys->nr &&
 | 
						if (idx < keys->nr &&
 | 
				
			||||||
	    keys->d[idx].btree_id	== btree &&
 | 
						    keys->d[idx].btree_id	== btree &&
 | 
				
			||||||
| 
						 | 
					@ -207,15 +210,18 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
 | 
					static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct journal_key *k = iter->idx - iter->keys->nr
 | 
						struct journal_key *k = iter->keys->d + iter->idx;
 | 
				
			||||||
		? iter->keys->d + iter->idx : NULL;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (k &&
 | 
						while (k < iter->keys->d + iter->keys->nr &&
 | 
				
			||||||
	    k->btree_id	== iter->btree_id &&
 | 
						       k->btree_id	== iter->btree_id &&
 | 
				
			||||||
	    k->level	== iter->level)
 | 
						       k->level		== iter->level) {
 | 
				
			||||||
		return k->k;
 | 
							if (!k->overwritten)
 | 
				
			||||||
 | 
								return k->k;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							iter->idx++;
 | 
				
			||||||
 | 
							k = iter->keys->d + iter->idx;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	iter->idx = iter->keys->nr;
 | 
					 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -238,8 +244,7 @@ static void bch2_journal_iter_init(struct bch_fs *c,
 | 
				
			||||||
	iter->btree_id	= id;
 | 
						iter->btree_id	= id;
 | 
				
			||||||
	iter->level	= level;
 | 
						iter->level	= level;
 | 
				
			||||||
	iter->keys	= &c->journal_keys;
 | 
						iter->keys	= &c->journal_keys;
 | 
				
			||||||
	iter->idx	= journal_key_search(&c->journal_keys, id, level, pos);
 | 
						iter->idx	= bch2_journal_key_search(&c->journal_keys, id, level, pos);
 | 
				
			||||||
	list_add(&iter->list, &c->journal_iters);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
 | 
					static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
 | 
				
			||||||
| 
						 | 
					@ -325,106 +330,33 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
 | 
				
			||||||
	bch2_journal_iter_exit(&iter->journal);
 | 
						bch2_journal_iter_exit(&iter->journal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
 | 
					void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
 | 
				
			||||||
						struct bch_fs *c,
 | 
											  struct bch_fs *c,
 | 
				
			||||||
						struct btree *b)
 | 
											  struct btree *b,
 | 
				
			||||||
 | 
											  struct btree_node_iter node_iter,
 | 
				
			||||||
 | 
											  struct bpos pos)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	memset(iter, 0, sizeof(*iter));
 | 
						memset(iter, 0, sizeof(*iter));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	iter->b = b;
 | 
						iter->b = b;
 | 
				
			||||||
	bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
 | 
						iter->node_iter = node_iter;
 | 
				
			||||||
	bch2_journal_iter_init(c, &iter->journal,
 | 
						bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
 | 
				
			||||||
			       b->c.btree_id, b->c.level, b->data->min_key);
 | 
						INIT_LIST_HEAD(&iter->journal.list);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Walk btree, overlaying keys from the journal: */
 | 
					/*
 | 
				
			||||||
 | 
					 * this version is used by btree_gc before filesystem has gone RW and
 | 
				
			||||||
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
 | 
					 * multithreaded, so uses the journal_iters list:
 | 
				
			||||||
					   struct btree_and_journal_iter iter)
 | 
					 */
 | 
				
			||||||
 | 
					void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
 | 
				
			||||||
 | 
											struct bch_fs *c,
 | 
				
			||||||
 | 
											struct btree *b)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
 | 
						struct btree_node_iter node_iter;
 | 
				
			||||||
	struct bkey_s_c k;
 | 
					 | 
				
			||||||
	struct bkey_buf tmp;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(!b->c.level);
 | 
						bch2_btree_node_iter_init_from_start(&node_iter, b);
 | 
				
			||||||
 | 
						__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
 | 
				
			||||||
	bch2_bkey_buf_init(&tmp);
 | 
						list_add(&iter->journal.list, &c->journal_iters);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	while (i < nr &&
 | 
					 | 
				
			||||||
	       (k = bch2_btree_and_journal_iter_peek(&iter)).k) {
 | 
					 | 
				
			||||||
		bch2_bkey_buf_reassemble(&tmp, c, k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
 | 
					 | 
				
			||||||
					b->c.btree_id, b->c.level - 1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		bch2_btree_and_journal_iter_advance(&iter);
 | 
					 | 
				
			||||||
		i++;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	bch2_bkey_buf_exit(&tmp, c);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
 | 
					 | 
				
			||||||
				enum btree_id btree_id,
 | 
					 | 
				
			||||||
				btree_walk_key_fn key_fn)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct btree_and_journal_iter iter;
 | 
					 | 
				
			||||||
	struct bkey_s_c k;
 | 
					 | 
				
			||||||
	struct bkey_buf tmp;
 | 
					 | 
				
			||||||
	struct btree *child;
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	bch2_bkey_buf_init(&tmp);
 | 
					 | 
				
			||||||
	bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
 | 
					 | 
				
			||||||
		if (b->c.level) {
 | 
					 | 
				
			||||||
			bch2_bkey_buf_reassemble(&tmp, c, k);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			child = bch2_btree_node_get_noiter(c, tmp.k,
 | 
					 | 
				
			||||||
						b->c.btree_id, b->c.level - 1,
 | 
					 | 
				
			||||||
						false);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			ret = PTR_ERR_OR_ZERO(child);
 | 
					 | 
				
			||||||
			if (ret)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			btree_and_journal_iter_prefetch(c, b, iter);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			ret = bch2_btree_and_journal_walk_recurse(trans, child,
 | 
					 | 
				
			||||||
					btree_id, key_fn);
 | 
					 | 
				
			||||||
			six_unlock_read(&child->c.lock);
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			ret = key_fn(trans, k);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (ret)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		bch2_btree_and_journal_iter_advance(&iter);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	bch2_btree_and_journal_iter_exit(&iter);
 | 
					 | 
				
			||||||
	bch2_bkey_buf_exit(&tmp, c);
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
 | 
					 | 
				
			||||||
				btree_walk_key_fn key_fn)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
					 | 
				
			||||||
	struct btree *b = c->btree_roots[btree_id].b;
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (btree_node_fake(b))
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	six_lock_read(&b->c.lock, NULL, NULL);
 | 
					 | 
				
			||||||
	ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
 | 
					 | 
				
			||||||
	six_unlock_read(&b->c.lock);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* sort and dedup all keys in the journal: */
 | 
					/* sort and dedup all keys in the journal: */
 | 
				
			||||||
| 
						 | 
					@ -449,9 +381,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
 | 
				
			||||||
	const struct journal_key *l = _l;
 | 
						const struct journal_key *l = _l;
 | 
				
			||||||
	const struct journal_key *r = _r;
 | 
						const struct journal_key *r = _r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return  cmp_int(l->btree_id,	r->btree_id) ?:
 | 
						return  journal_key_cmp(l, r) ?:
 | 
				
			||||||
		cmp_int(l->level,	r->level) ?:
 | 
					 | 
				
			||||||
		bpos_cmp(l->k->k.p, r->k->k.p) ?:
 | 
					 | 
				
			||||||
		cmp_int(l->journal_seq, r->journal_seq) ?:
 | 
							cmp_int(l->journal_seq, r->journal_seq) ?:
 | 
				
			||||||
		cmp_int(l->journal_offset, r->journal_offset);
 | 
							cmp_int(l->journal_offset, r->journal_offset);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,6 +31,9 @@ struct btree_and_journal_iter {
 | 
				
			||||||
	}			last;
 | 
						}			last;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
 | 
				
			||||||
 | 
								       unsigned, struct bpos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
 | 
					int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
 | 
				
			||||||
				 unsigned, struct bkey_i *);
 | 
									 unsigned, struct bkey_i *);
 | 
				
			||||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
 | 
					int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
 | 
				
			||||||
| 
						 | 
					@ -45,14 +48,13 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
 | 
				
			||||||
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
 | 
					struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
 | 
					void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
 | 
				
			||||||
 | 
					void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
 | 
				
			||||||
 | 
									struct bch_fs *, struct btree *,
 | 
				
			||||||
 | 
									struct btree_node_iter, struct bpos);
 | 
				
			||||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
 | 
					void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
 | 
				
			||||||
						struct bch_fs *,
 | 
											struct bch_fs *,
 | 
				
			||||||
						struct btree *);
 | 
											struct btree *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void bch2_journal_keys_free(struct journal_keys *);
 | 
					void bch2_journal_keys_free(struct journal_keys *);
 | 
				
			||||||
void bch2_journal_entries_free(struct list_head *);
 | 
					void bch2_journal_entries_free(struct list_head *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue