mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	bcachefs: Delete old deadlock avoidance code
This deletes our old lock ordering based deadlock avoidance code. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
		
							parent
							
								
									96d994b37c
								
							
						
					
					
						commit
						0d7009d7ca
					
				
					 9 changed files with 40 additions and 311 deletions
				
			
		| 
						 | 
					@ -151,8 +151,6 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Cause future lookups for this node to fail: */
 | 
						/* Cause future lookups for this node to fail: */
 | 
				
			||||||
	b->hash_val = 0;
 | 
						b->hash_val = 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	six_lock_wakeup_all(&b->c.lock);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
 | 
					int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
 | 
				
			||||||
| 
						 | 
					@ -755,16 +753,6 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
 | 
				
			||||||
	return b;
 | 
						return b;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int lock_node_check_fn(struct six_lock *lock, void *p)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct btree *b = container_of(lock, struct btree, c.lock);
 | 
					 | 
				
			||||||
	const struct bkey_i *k = p;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (b->hash_val != btree_ptr_hash_val(k))
 | 
					 | 
				
			||||||
		return BCH_ERR_lock_fail_node_reused;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
 | 
					static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct printbuf buf = PRINTBUF;
 | 
						struct printbuf buf = PRINTBUF;
 | 
				
			||||||
| 
						 | 
					@ -886,15 +874,11 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
 | 
				
			||||||
		if (btree_node_read_locked(path, level + 1))
 | 
							if (btree_node_read_locked(path, level + 1))
 | 
				
			||||||
			btree_node_unlock(trans, path, level + 1);
 | 
								btree_node_unlock(trans, path, level + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = btree_node_lock(trans, path, &b->c, k->k.p, level, lock_type,
 | 
							ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
 | 
				
			||||||
				      lock_node_check_fn, (void *) k, trace_ip);
 | 
							if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
				
			||||||
		if (unlikely(ret)) {
 | 
								return ERR_PTR(ret);
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
 | 
					
 | 
				
			||||||
				goto retry;
 | 
							BUG_ON(ret);
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
					 | 
				
			||||||
				return ERR_PTR(ret);
 | 
					 | 
				
			||||||
			BUG();
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
 | 
							if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
 | 
				
			||||||
			     b->c.level != level ||
 | 
								     b->c.level != level ||
 | 
				
			||||||
| 
						 | 
					@ -1000,13 +984,10 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
lock_node:
 | 
					lock_node:
 | 
				
			||||||
		ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
 | 
							ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
 | 
				
			||||||
		if (unlikely(ret)) {
 | 
							if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
 | 
								return ERR_PTR(ret);
 | 
				
			||||||
				goto retry;
 | 
					
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
							BUG_ON(ret);
 | 
				
			||||||
				return ERR_PTR(ret);
 | 
					 | 
				
			||||||
			BUG();
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
 | 
							if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
 | 
				
			||||||
			     b->c.btree_id != btree_id ||
 | 
								     b->c.btree_id != btree_id ||
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -689,16 +689,6 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Btree path: traverse, set_pos: */
 | 
					/* Btree path: traverse, set_pos: */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int lock_root_check_fn(struct six_lock *lock, void *p)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct btree *b = container_of(lock, struct btree, c.lock);
 | 
					 | 
				
			||||||
	struct btree **rootp = p;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (b != *rootp)
 | 
					 | 
				
			||||||
		return BCH_ERR_lock_fail_root_changed;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int btree_path_lock_root(struct btree_trans *trans,
 | 
					static inline int btree_path_lock_root(struct btree_trans *trans,
 | 
				
			||||||
				       struct btree_path *path,
 | 
									       struct btree_path *path,
 | 
				
			||||||
				       unsigned depth_want,
 | 
									       unsigned depth_want,
 | 
				
			||||||
| 
						 | 
					@ -730,10 +720,8 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		lock_type = __btree_lock_want(path, path->level);
 | 
							lock_type = __btree_lock_want(path, path->level);
 | 
				
			||||||
		ret = btree_node_lock(trans, path, &b->c, SPOS_MAX,
 | 
							ret = btree_node_lock(trans, path, &b->c,
 | 
				
			||||||
				      path->level, lock_type,
 | 
									      path->level, lock_type, trace_ip);
 | 
				
			||||||
				      lock_root_check_fn, rootp,
 | 
					 | 
				
			||||||
				      trace_ip);
 | 
					 | 
				
			||||||
		if (unlikely(ret)) {
 | 
							if (unlikely(ret)) {
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
 | 
								if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
 | 
				
			||||||
				continue;
 | 
									continue;
 | 
				
			||||||
| 
						 | 
					@ -939,7 +927,7 @@ static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
 | 
				
			||||||
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 | 
					static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct bch_fs *c = trans->c;
 | 
						struct bch_fs *c = trans->c;
 | 
				
			||||||
	struct btree_path *path, *prev;
 | 
						struct btree_path *path;
 | 
				
			||||||
	unsigned long trace_ip = _RET_IP_;
 | 
						unsigned long trace_ip = _RET_IP_;
 | 
				
			||||||
	int i, ret = 0;
 | 
						int i, ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -948,7 +936,6 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans->in_traverse_all = true;
 | 
						trans->in_traverse_all = true;
 | 
				
			||||||
retry_all:
 | 
					retry_all:
 | 
				
			||||||
	prev = NULL;
 | 
					 | 
				
			||||||
	trans->restarted = 0;
 | 
						trans->restarted = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans_for_each_path(trans, path)
 | 
						trans_for_each_path(trans, path)
 | 
				
			||||||
| 
						 | 
					@ -956,18 +943,6 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	btree_trans_sort_paths(trans);
 | 
						btree_trans_sort_paths(trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans_for_each_path_inorder_reverse(trans, path, i) {
 | 
					 | 
				
			||||||
		if (prev) {
 | 
					 | 
				
			||||||
			if (path->btree_id == prev->btree_id &&
 | 
					 | 
				
			||||||
			    path->locks_want < prev->locks_want)
 | 
					 | 
				
			||||||
				__bch2_btree_path_upgrade(trans, path, prev->locks_want);
 | 
					 | 
				
			||||||
			else if (!path->locks_want && prev->locks_want)
 | 
					 | 
				
			||||||
				__bch2_btree_path_upgrade(trans, path, 1);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		prev = path;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	bch2_trans_unlock(trans);
 | 
						bch2_trans_unlock(trans);
 | 
				
			||||||
	cond_resched();
 | 
						cond_resched();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3026,16 +3001,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	b = READ_ONCE(trans->locking);
 | 
						b = READ_ONCE(trans->locking);
 | 
				
			||||||
	if (b) {
 | 
						if (b) {
 | 
				
			||||||
		path = &trans->paths[trans->locking_path_idx];
 | 
							prt_printf(out, " locking node ");
 | 
				
			||||||
		prt_printf(out, "  locking path %u %c l=%u %c %s:",
 | 
					 | 
				
			||||||
		       trans->locking_path_idx,
 | 
					 | 
				
			||||||
		       path->cached ? 'c' : 'b',
 | 
					 | 
				
			||||||
		       trans->locking_level,
 | 
					 | 
				
			||||||
		       lock_types[trans->locking_wait.lock_want],
 | 
					 | 
				
			||||||
		       bch2_btree_ids[trans->locking_btree_id]);
 | 
					 | 
				
			||||||
		bch2_bpos_to_text(out, trans->locking_pos);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		prt_printf(out, " node ");
 | 
					 | 
				
			||||||
		bch2_btree_path_node_to_text(out, b);
 | 
							bch2_btree_path_node_to_text(out, b);
 | 
				
			||||||
		prt_printf(out, "\n");
 | 
							prt_printf(out, "\n");
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -398,17 +398,6 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int bkey_cached_check_fn(struct six_lock *lock, void *p)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
 | 
					 | 
				
			||||||
	const struct btree_path *path = p;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ck->key.btree_id != path->btree_id &&
 | 
					 | 
				
			||||||
	    bpos_cmp(ck->key.pos, path->pos))
 | 
					 | 
				
			||||||
		return BCH_ERR_lock_fail_node_reused;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__flatten
 | 
					__flatten
 | 
				
			||||||
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
 | 
					int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
 | 
				
			||||||
				    unsigned flags)
 | 
									    unsigned flags)
 | 
				
			||||||
| 
						 | 
					@ -440,16 +429,12 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		enum six_lock_type lock_want = __btree_lock_want(path, 0);
 | 
							enum six_lock_type lock_want = __btree_lock_want(path, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0,
 | 
							ret = btree_node_lock(trans, path, (void *) ck, 0,
 | 
				
			||||||
				      lock_want,
 | 
									      lock_want, _THIS_IP_);
 | 
				
			||||||
				      bkey_cached_check_fn, path, _THIS_IP_);
 | 
							if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
				
			||||||
		if (ret) {
 | 
								goto err;
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
 | 
					
 | 
				
			||||||
				goto retry;
 | 
							BUG_ON(ret);
 | 
				
			||||||
			if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 | 
					 | 
				
			||||||
				goto err;
 | 
					 | 
				
			||||||
			BUG();
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (ck->key.btree_id != path->btree_id ||
 | 
							if (ck->key.btree_id != path->btree_id ||
 | 
				
			||||||
		    bpos_cmp(ck->key.pos, path->pos)) {
 | 
							    bpos_cmp(ck->key.pos, path->pos)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -92,6 +92,7 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (i == g->g) {
 | 
						if (i == g->g) {
 | 
				
			||||||
 | 
							trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
 | 
				
			||||||
		ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
 | 
							ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		i->trans->lock_must_abort = true;
 | 
							i->trans->lock_must_abort = true;
 | 
				
			||||||
| 
						 | 
					@ -216,8 +217,10 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
 | 
				
			||||||
	struct btree_path *path;
 | 
						struct btree_path *path;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (trans->lock_must_abort)
 | 
						if (trans->lock_must_abort) {
 | 
				
			||||||
 | 
							trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
 | 
				
			||||||
		return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
 | 
							return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	g.nr = 0;
 | 
						g.nr = 0;
 | 
				
			||||||
	ret = lock_graph_descend(&g, trans, cycle);
 | 
						ret = lock_graph_descend(&g, trans, cycle);
 | 
				
			||||||
| 
						 | 
					@ -294,7 +297,7 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
 | 
				
			||||||
	return bch2_check_for_deadlock(trans, NULL);
 | 
						return bch2_check_for_deadlock(trans, NULL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __bch2_btree_node_lock_write(struct btree_trans *trans,
 | 
					int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
 | 
				
			||||||
				 struct btree_bkey_cached_common *b,
 | 
									 struct btree_bkey_cached_common *b,
 | 
				
			||||||
				 bool lock_may_not_fail)
 | 
									 bool lock_may_not_fail)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -311,99 +314,12 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans,
 | 
				
			||||||
	ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
 | 
						ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
 | 
				
			||||||
	six_lock_readers_add(&b->lock, readers);
 | 
						six_lock_readers_add(&b->lock, readers);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool path_has_read_locks(struct btree_path *path)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned l;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (l = 0; l < BTREE_MAX_DEPTH; l++)
 | 
					 | 
				
			||||||
		if (btree_node_read_locked(path, l))
 | 
					 | 
				
			||||||
			return true;
 | 
					 | 
				
			||||||
	return false;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Slowpath: */
 | 
					 | 
				
			||||||
int __bch2_btree_node_lock(struct btree_trans *trans,
 | 
					 | 
				
			||||||
			   struct btree_path *path,
 | 
					 | 
				
			||||||
			   struct btree_bkey_cached_common *b,
 | 
					 | 
				
			||||||
			   struct bpos pos, unsigned level,
 | 
					 | 
				
			||||||
			   enum six_lock_type type,
 | 
					 | 
				
			||||||
			   six_lock_should_sleep_fn should_sleep_fn, void *p,
 | 
					 | 
				
			||||||
			   unsigned long ip)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct btree_path *linked;
 | 
					 | 
				
			||||||
	unsigned reason;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Check if it's safe to block: */
 | 
					 | 
				
			||||||
	trans_for_each_path(trans, linked) {
 | 
					 | 
				
			||||||
		if (!linked->nodes_locked)
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Can't block taking an intent lock if we have _any_ nodes read
 | 
					 | 
				
			||||||
		 * locked:
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * - Our read lock blocks another thread with an intent lock on
 | 
					 | 
				
			||||||
		 *   the same node from getting a write lock, and thus from
 | 
					 | 
				
			||||||
		 *   dropping its intent lock
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * - And the other thread may have multiple nodes intent locked:
 | 
					 | 
				
			||||||
		 *   both the node we want to intent lock, and the node we
 | 
					 | 
				
			||||||
		 *   already have read locked - deadlock:
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (type == SIX_LOCK_intent &&
 | 
					 | 
				
			||||||
		    path_has_read_locks(linked)) {
 | 
					 | 
				
			||||||
			reason = 1;
 | 
					 | 
				
			||||||
			goto deadlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (linked->btree_id != path->btree_id) {
 | 
					 | 
				
			||||||
			if (linked->btree_id < path->btree_id)
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			reason = 3;
 | 
					 | 
				
			||||||
			goto deadlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Within the same btree, non-cached paths come before cached
 | 
					 | 
				
			||||||
		 * paths:
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (linked->cached != path->cached) {
 | 
					 | 
				
			||||||
			if (!linked->cached)
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			reason = 4;
 | 
					 | 
				
			||||||
			goto deadlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Interior nodes must be locked before their descendants: if
 | 
					 | 
				
			||||||
		 * another path has possible descendants locked of the node
 | 
					 | 
				
			||||||
		 * we're about to lock, it must have the ancestors locked too:
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (level > btree_path_highest_level_locked(linked)) {
 | 
					 | 
				
			||||||
			reason = 5;
 | 
					 | 
				
			||||||
			goto deadlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* Must lock btree nodes in key order: */
 | 
					 | 
				
			||||||
		if (btree_node_locked(linked, level) &&
 | 
					 | 
				
			||||||
		    bpos_cmp(pos, btree_node_pos(&linked->l[level].b->c)) <= 0) {
 | 
					 | 
				
			||||||
			reason = 7;
 | 
					 | 
				
			||||||
			goto deadlock;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return btree_node_lock_type(trans, path, b, pos, level,
 | 
					 | 
				
			||||||
				    type, should_sleep_fn, p);
 | 
					 | 
				
			||||||
deadlock:
 | 
					 | 
				
			||||||
	trace_and_count(trans->c, trans_restart_would_deadlock, trans, ip, reason, linked, path, &pos);
 | 
					 | 
				
			||||||
	return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* relock */
 | 
					/* relock */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool btree_path_get_locks(struct btree_trans *trans,
 | 
					static inline bool btree_path_get_locks(struct btree_trans *trans,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -195,8 +195,8 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans,
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans->lock_may_not_fail = lock_may_not_fail;
 | 
						trans->lock_may_not_fail = lock_may_not_fail;
 | 
				
			||||||
	trans->locking		= b;
 | 
					 | 
				
			||||||
	trans->lock_must_abort	= false;
 | 
						trans->lock_must_abort	= false;
 | 
				
			||||||
 | 
						trans->locking		= b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
 | 
						ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
 | 
				
			||||||
				   bch2_six_check_for_deadlock, trans);
 | 
									   bch2_six_check_for_deadlock, trans);
 | 
				
			||||||
| 
						 | 
					@ -222,26 +222,6 @@ static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
 | 
				
			||||||
	BUG_ON(ret);
 | 
						BUG_ON(ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int btree_node_lock_type(struct btree_trans *trans,
 | 
					 | 
				
			||||||
				       struct btree_path *path,
 | 
					 | 
				
			||||||
				       struct btree_bkey_cached_common *b,
 | 
					 | 
				
			||||||
				       struct bpos pos, unsigned level,
 | 
					 | 
				
			||||||
				       enum six_lock_type type,
 | 
					 | 
				
			||||||
				       six_lock_should_sleep_fn should_sleep_fn, void *p)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (six_trylock_type(&b->lock, type))
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	trans->locking_path_idx = path->idx;
 | 
					 | 
				
			||||||
	trans->locking_pos	= pos;
 | 
					 | 
				
			||||||
	trans->locking_btree_id	= path->btree_id;
 | 
					 | 
				
			||||||
	trans->locking_level	= level;
 | 
					 | 
				
			||||||
	trans->lock_may_not_fail = false;
 | 
					 | 
				
			||||||
	trans->locking		= b;
 | 
					 | 
				
			||||||
	return six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
 | 
					 | 
				
			||||||
				    bch2_six_check_for_deadlock, trans);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Lock a btree node if we already have it locked on one of our linked
 | 
					 * Lock a btree node if we already have it locked on one of our linked
 | 
				
			||||||
 * iterators:
 | 
					 * iterators:
 | 
				
			||||||
| 
						 | 
					@ -263,19 +243,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
 | 
					 | 
				
			||||||
			   struct btree_bkey_cached_common *,
 | 
					 | 
				
			||||||
			   struct bpos, unsigned,
 | 
					 | 
				
			||||||
			   enum six_lock_type,
 | 
					 | 
				
			||||||
			   six_lock_should_sleep_fn, void *,
 | 
					 | 
				
			||||||
			   unsigned long);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int btree_node_lock(struct btree_trans *trans,
 | 
					static inline int btree_node_lock(struct btree_trans *trans,
 | 
				
			||||||
			struct btree_path *path,
 | 
								struct btree_path *path,
 | 
				
			||||||
			struct btree_bkey_cached_common *b,
 | 
								struct btree_bkey_cached_common *b,
 | 
				
			||||||
			struct bpos pos, unsigned level,
 | 
								unsigned level,
 | 
				
			||||||
			enum six_lock_type type,
 | 
								enum six_lock_type type,
 | 
				
			||||||
			six_lock_should_sleep_fn should_sleep_fn, void *p,
 | 
					 | 
				
			||||||
			unsigned long ip)
 | 
								unsigned long ip)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
| 
						 | 
					@ -285,8 +257,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (likely(six_trylock_type(&b->lock, type)) ||
 | 
						if (likely(six_trylock_type(&b->lock, type)) ||
 | 
				
			||||||
	    btree_node_lock_increment(trans, b, level, type) ||
 | 
						    btree_node_lock_increment(trans, b, level, type) ||
 | 
				
			||||||
	    !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
 | 
						    !(ret = btree_node_lock_nopath(trans, b, type))) {
 | 
				
			||||||
					   should_sleep_fn, p, ip))) {
 | 
					 | 
				
			||||||
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
 | 
					#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
 | 
				
			||||||
		path->l[b->level].lock_taken_time = ktime_get_ns();
 | 
							path->l[b->level].lock_taken_time = ktime_get_ns();
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					@ -295,15 +266,14 @@ static inline int btree_node_lock(struct btree_trans *trans,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *, bool);
 | 
					int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *,
 | 
				
			||||||
 | 
									 struct btree_bkey_cached_common *b, bool);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int __btree_node_lock_write(struct btree_trans *trans,
 | 
					static inline int __btree_node_lock_write(struct btree_trans *trans,
 | 
				
			||||||
					  struct btree_path *path,
 | 
										  struct btree_path *path,
 | 
				
			||||||
					  struct btree_bkey_cached_common *b,
 | 
										  struct btree_bkey_cached_common *b,
 | 
				
			||||||
					  bool lock_may_not_fail)
 | 
										  bool lock_may_not_fail)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	EBUG_ON(&path->l[b->level].b->c != b);
 | 
						EBUG_ON(&path->l[b->level].b->c != b);
 | 
				
			||||||
	EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
 | 
						EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
 | 
				
			||||||
	EBUG_ON(!btree_node_intent_locked(path, b->level));
 | 
						EBUG_ON(!btree_node_intent_locked(path, b->level));
 | 
				
			||||||
| 
						 | 
					@ -315,13 +285,9 @@ static inline int __btree_node_lock_write(struct btree_trans *trans,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
 | 
						mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = likely(six_trylock_write(&b->lock))
 | 
						return likely(six_trylock_write(&b->lock))
 | 
				
			||||||
		? 0
 | 
							? 0
 | 
				
			||||||
		: __bch2_btree_node_lock_write(trans, b, lock_may_not_fail);
 | 
							: __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
 | 
				
			||||||
	if (ret)
 | 
					 | 
				
			||||||
		mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ret;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
 | 
					static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -391,10 +391,6 @@ struct btree_trans {
 | 
				
			||||||
	struct list_head	list;
 | 
						struct list_head	list;
 | 
				
			||||||
	u64			last_begin_time;
 | 
						u64			last_begin_time;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unsigned		locking_path_idx;
 | 
					 | 
				
			||||||
	struct bpos		locking_pos;
 | 
					 | 
				
			||||||
	u8			locking_btree_id;
 | 
					 | 
				
			||||||
	u8			locking_level;
 | 
					 | 
				
			||||||
	u8			lock_may_not_fail;
 | 
						u8			lock_may_not_fail;
 | 
				
			||||||
	u8			lock_must_abort;
 | 
						u8			lock_must_abort;
 | 
				
			||||||
	struct btree_bkey_cached_common *locking;
 | 
						struct btree_bkey_cached_common *locking;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -796,23 +796,6 @@ static inline void normalize_read_intent_locks(struct btree_trans *trans)
 | 
				
			||||||
	bch2_trans_verify_locks(trans);
 | 
						bch2_trans_verify_locks(trans);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct btree_path *pos)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct btree_path *path;
 | 
					 | 
				
			||||||
	unsigned i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	trans_for_each_path_inorder(trans, path, i) {
 | 
					 | 
				
			||||||
		//if (path == pos)
 | 
					 | 
				
			||||||
		//	break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (btree_node_read_locked(path, path->level) &&
 | 
					 | 
				
			||||||
		    !bch2_btree_path_upgrade_noupgrade_sibs(trans, path, path->level + 1))
 | 
					 | 
				
			||||||
			return true;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return false;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int trans_lock_write(struct btree_trans *trans)
 | 
					static inline int trans_lock_write(struct btree_trans *trans)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btree_insert_entry *i;
 | 
						struct btree_insert_entry *i;
 | 
				
			||||||
| 
						 | 
					@ -822,31 +805,15 @@ static inline int trans_lock_write(struct btree_trans *trans)
 | 
				
			||||||
		if (same_leaf_as_prev(trans, i))
 | 
							if (same_leaf_as_prev(trans, i))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							ret = bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c);
 | 
				
			||||||
		 * six locks are unfair, and read locks block while a thread
 | 
							if (ret)
 | 
				
			||||||
		 * wants a write lock: thus, we need to tell the cycle detector
 | 
								goto fail;
 | 
				
			||||||
		 * we have a write lock _before_ taking the lock:
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_write);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!six_trylock_write(&insert_l(i)->b->c.lock)) {
 | 
					 | 
				
			||||||
			if (have_conflicting_read_lock(trans, i->path))
 | 
					 | 
				
			||||||
				goto fail;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			ret = btree_node_lock_type(trans, i->path,
 | 
					 | 
				
			||||||
					     &insert_l(i)->b->c,
 | 
					 | 
				
			||||||
					     i->path->pos, i->level,
 | 
					 | 
				
			||||||
					     SIX_LOCK_write, NULL, NULL);
 | 
					 | 
				
			||||||
			BUG_ON(ret);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
 | 
							bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
fail:
 | 
					fail:
 | 
				
			||||||
	mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_intent);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	while (--i >= trans->updates) {
 | 
						while (--i >= trans->updates) {
 | 
				
			||||||
		if (same_leaf_as_prev(trans, i))
 | 
							if (same_leaf_as_prev(trans, i))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -52,7 +52,6 @@
 | 
				
			||||||
	x(BCH_ERR_no_btree_node,	no_btree_node_down)			\
 | 
						x(BCH_ERR_no_btree_node,	no_btree_node_down)			\
 | 
				
			||||||
	x(BCH_ERR_no_btree_node,	no_btree_node_init)			\
 | 
						x(BCH_ERR_no_btree_node,	no_btree_node_init)			\
 | 
				
			||||||
	x(BCH_ERR_no_btree_node,	no_btree_node_cached)			\
 | 
						x(BCH_ERR_no_btree_node,	no_btree_node_cached)			\
 | 
				
			||||||
	x(0,				lock_fail_node_reused)			\
 | 
					 | 
				
			||||||
	x(0,				lock_fail_root_changed)			\
 | 
						x(0,				lock_fail_root_changed)			\
 | 
				
			||||||
	x(0,				journal_reclaim_would_deadlock)		\
 | 
						x(0,				journal_reclaim_would_deadlock)		\
 | 
				
			||||||
	x(0,				fsck)					\
 | 
						x(0,				fsck)					\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1012,57 +1012,10 @@ DEFINE_EVENT(transaction_restart_iter,	trans_restart_memory_allocation_failure,
 | 
				
			||||||
	TP_ARGS(trans, caller_ip, path)
 | 
						TP_ARGS(trans, caller_ip, path)
 | 
				
			||||||
);
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TRACE_EVENT(trans_restart_would_deadlock,
 | 
					DEFINE_EVENT(transaction_event,	trans_restart_would_deadlock,
 | 
				
			||||||
	TP_PROTO(struct btree_trans *trans,
 | 
						TP_PROTO(struct btree_trans *trans,
 | 
				
			||||||
		 unsigned long	caller_ip,
 | 
							 unsigned long caller_ip),
 | 
				
			||||||
		 unsigned	reason,
 | 
						TP_ARGS(trans, caller_ip)
 | 
				
			||||||
		 struct btree_path *have,
 | 
					 | 
				
			||||||
		 struct btree_path *want,
 | 
					 | 
				
			||||||
		 struct bpos	*want_pos),
 | 
					 | 
				
			||||||
	TP_ARGS(trans, caller_ip, reason,
 | 
					 | 
				
			||||||
		have, want, want_pos),
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	TP_STRUCT__entry(
 | 
					 | 
				
			||||||
		__array(char,			trans_fn, 32	)
 | 
					 | 
				
			||||||
		__field(unsigned long,		caller_ip	)
 | 
					 | 
				
			||||||
		__field(u8,			in_traverse_all	)
 | 
					 | 
				
			||||||
		__field(u8,			reason		)
 | 
					 | 
				
			||||||
		__field(u8,			have_btree_id	)
 | 
					 | 
				
			||||||
		__field(u8,			have_type	)
 | 
					 | 
				
			||||||
		__field(u8,			want_btree_id	)
 | 
					 | 
				
			||||||
		__field(u8,			want_type	)
 | 
					 | 
				
			||||||
		TRACE_BPOS_entries(have_pos)
 | 
					 | 
				
			||||||
		TRACE_BPOS_entries(want_pos)
 | 
					 | 
				
			||||||
	),
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	TP_fast_assign(
 | 
					 | 
				
			||||||
		strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
 | 
					 | 
				
			||||||
		__entry->caller_ip		= caller_ip;
 | 
					 | 
				
			||||||
		__entry->in_traverse_all	= trans->in_traverse_all;
 | 
					 | 
				
			||||||
		__entry->reason			= reason;
 | 
					 | 
				
			||||||
		__entry->have_btree_id		= have->btree_id;
 | 
					 | 
				
			||||||
		__entry->have_type		= have->cached;
 | 
					 | 
				
			||||||
		__entry->want_btree_id		= want->btree_id;
 | 
					 | 
				
			||||||
		__entry->want_type		= want->cached;
 | 
					 | 
				
			||||||
		TRACE_BPOS_assign(have_pos, have->pos);
 | 
					 | 
				
			||||||
		TRACE_BPOS_assign(want_pos, *want_pos);
 | 
					 | 
				
			||||||
	),
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u",
 | 
					 | 
				
			||||||
		  __entry->trans_fn,
 | 
					 | 
				
			||||||
		  (void *) __entry->caller_ip,
 | 
					 | 
				
			||||||
		  __entry->in_traverse_all,
 | 
					 | 
				
			||||||
		  __entry->reason,
 | 
					 | 
				
			||||||
		  __entry->have_btree_id,
 | 
					 | 
				
			||||||
		  __entry->have_type,
 | 
					 | 
				
			||||||
		  __entry->have_pos_inode,
 | 
					 | 
				
			||||||
		  __entry->have_pos_offset,
 | 
					 | 
				
			||||||
		  __entry->have_pos_snapshot,
 | 
					 | 
				
			||||||
		  __entry->want_btree_id,
 | 
					 | 
				
			||||||
		  __entry->want_type,
 | 
					 | 
				
			||||||
		  __entry->want_pos_inode,
 | 
					 | 
				
			||||||
		  __entry->want_pos_offset,
 | 
					 | 
				
			||||||
		  __entry->want_pos_snapshot)
 | 
					 | 
				
			||||||
);
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEFINE_EVENT(transaction_event,	trans_restart_would_deadlock_recursion_limit,
 | 
					DEFINE_EVENT(transaction_event,	trans_restart_would_deadlock_recursion_limit,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue