mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	bcachefs: Fix needs_whiteout BUG_ON() in bkey_sort()
Btree nodes are log structured; thus, we need to emit whiteouts when we're deleting a key that's been written out to disk. k->needs_whiteout tracks whether a key will need a whiteout when it's deleted, and this requires some careful handling; e.g. the key we're deleting may not have been written out to disk, but it may have overwritten a key that was - thus we need to carry this flag around on overwrites. Invariants: There may be multiple key for the same position in a given node (because of overwrites), but only one of them will be a live (non deleted) key, and only one key for a given position will have the needs_whiteout flag set. Additionally, we don't want to carry around whiteouts that need to be written in the main searchable part of a btree node - btree_iter_peek() will have to skip past them, and this can lead to an O(n^2) issues when doing sequential deletions (e.g. inode rm/truncate). So there's a separate region in the btree node buffer for unwritten whiteouts; these are merge sorted with the rest of the keys we're writing in the btree node write path. The unwritten whiteouts was a later optimization that bch2_sort_keys() didn't take into account; the unwritten whiteouts area means that we never have deleted keys with needs_whiteout set in the main searchable part of a btree node. That means we can simplify and optimize some sort paths, and eliminate an assertion that syzbot found: - Unless we're in the btree node write path, it's always ok to drop whiteouts when sorting - When sorting for a btree node write, we drop the whiteout if it's not from the unwritten whiteouts area, or if it's overwritten by a real key at the same position. This completely eliminates some tricky logic for propagating the needs_whiteout flag: syzbot was able to hit the assertion that checked that there shouldn't be more than one key at the same pos with needs_whiteout set, likely due to a combination of flipping on needs_whiteout on all written keys (they need whiteouts if overwritten), combined with not always dropping unneeded whiteouts, and the tricky logic in the sort path for preserving needs_whiteout that wasn't really needed. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
		
							parent
							
								
									5ad1f33c29
								
							
						
					
					
						commit
						5dfd3746b6
					
				
					 3 changed files with 57 additions and 46 deletions
				
			
		|  | @ -6,9 +6,9 @@ | |||
| #include "bset.h" | ||||
| #include "extents.h" | ||||
| 
 | ||||
| typedef int (*sort_cmp_fn)(struct btree *, | ||||
| 			   struct bkey_packed *, | ||||
| 			   struct bkey_packed *); | ||||
| typedef int (*sort_cmp_fn)(const struct btree *, | ||||
| 			   const struct bkey_packed *, | ||||
| 			   const struct bkey_packed *); | ||||
| 
 | ||||
| static inline bool sort_iter_end(struct sort_iter *iter) | ||||
| { | ||||
|  | @ -70,9 +70,9 @@ static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, | |||
| /*
 | ||||
|  * If keys compare equal, compare by pointer order: | ||||
|  */ | ||||
| static inline int key_sort_fix_overlapping_cmp(struct btree *b, | ||||
| 					       struct bkey_packed *l, | ||||
| 					       struct bkey_packed *r) | ||||
| static inline int key_sort_fix_overlapping_cmp(const struct btree *b, | ||||
| 					       const struct bkey_packed *l, | ||||
| 					       const struct bkey_packed *r) | ||||
| { | ||||
| 	return bch2_bkey_cmp_packed(b, l, r) ?: | ||||
| 		cmp_int((unsigned long) l, (unsigned long) r); | ||||
|  | @ -154,46 +154,59 @@ bch2_sort_repack(struct bset *dst, struct btree *src, | |||
| 	return nr; | ||||
| } | ||||
| 
 | ||||
| static inline int sort_keys_cmp(struct btree *b, | ||||
| 				struct bkey_packed *l, | ||||
| 				struct bkey_packed *r) | ||||
| static inline int keep_unwritten_whiteouts_cmp(const struct btree *b, | ||||
| 				const struct bkey_packed *l, | ||||
| 				const struct bkey_packed *r) | ||||
| { | ||||
| 	return bch2_bkey_cmp_packed_inlined(b, l, r) ?: | ||||
| 		(int) bkey_deleted(r) - (int) bkey_deleted(l) ?: | ||||
| 		(int) l->needs_whiteout - (int) r->needs_whiteout; | ||||
| 		(long) l - (long) r; | ||||
| } | ||||
| 
 | ||||
| unsigned bch2_sort_keys(struct bkey_packed *dst, | ||||
| 			struct sort_iter *iter, | ||||
| 			bool filter_whiteouts) | ||||
| #include "btree_update_interior.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * For sorting in the btree node write path: whiteouts not in the unwritten | ||||
|  * whiteouts area are dropped, whiteouts in the unwritten whiteouts area are | ||||
|  * dropped if overwritten by real keys: | ||||
|  */ | ||||
| unsigned bch2_sort_keys_keep_unwritten_whiteouts(struct bkey_packed *dst, struct sort_iter *iter) | ||||
| { | ||||
| 	const struct bkey_format *f = &iter->b->format; | ||||
| 	struct bkey_packed *in, *next, *out = dst; | ||||
| 
 | ||||
| 	sort_iter_sort(iter, sort_keys_cmp); | ||||
| 	sort_iter_sort(iter, keep_unwritten_whiteouts_cmp); | ||||
| 
 | ||||
| 	while ((in = sort_iter_next(iter, sort_keys_cmp))) { | ||||
| 		bool needs_whiteout = false; | ||||
| 
 | ||||
| 		if (bkey_deleted(in) && | ||||
| 		    (filter_whiteouts || !in->needs_whiteout)) | ||||
| 	while ((in = sort_iter_next(iter, keep_unwritten_whiteouts_cmp))) { | ||||
| 		if (bkey_deleted(in) && in < unwritten_whiteouts_start(iter->b)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		while ((next = sort_iter_peek(iter)) && | ||||
| 		       !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) { | ||||
| 			BUG_ON(in->needs_whiteout && | ||||
| 			       next->needs_whiteout); | ||||
| 			needs_whiteout |= in->needs_whiteout; | ||||
| 			in = sort_iter_next(iter, sort_keys_cmp); | ||||
| 		} | ||||
| 		if ((next = sort_iter_peek(iter)) && | ||||
| 		    !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (bkey_deleted(in)) { | ||||
| 			memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in)); | ||||
| 			set_bkeyp_val_u64s(f, out, 0); | ||||
| 		} else { | ||||
| 			bkey_p_copy(out, in); | ||||
| 		} | ||||
| 		out->needs_whiteout |= needs_whiteout; | ||||
| 		bkey_p_copy(out, in); | ||||
| 		out = bkey_p_next(out); | ||||
| 	} | ||||
| 
 | ||||
| 	return (u64 *) out - (u64 *) dst; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Main sort routine for compacting a btree node in memory: we always drop | ||||
|  * whiteouts because any whiteouts that need to be written are in the unwritten | ||||
|  * whiteouts area: | ||||
|  */ | ||||
| unsigned bch2_sort_keys(struct bkey_packed *dst, struct sort_iter *iter) | ||||
| { | ||||
| 	struct bkey_packed *in, *out = dst; | ||||
| 
 | ||||
| 	sort_iter_sort(iter, bch2_bkey_cmp_packed_inlined); | ||||
| 
 | ||||
| 	while ((in = sort_iter_next(iter, bch2_bkey_cmp_packed_inlined))) { | ||||
| 		if (bkey_deleted(in)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		bkey_p_copy(out, in); | ||||
| 		out = bkey_p_next(out); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -48,7 +48,7 @@ bch2_sort_repack(struct bset *, struct btree *, | |||
| 		 struct btree_node_iter *, | ||||
| 		 struct bkey_format *, bool); | ||||
| 
 | ||||
| unsigned bch2_sort_keys(struct bkey_packed *, | ||||
| 			struct sort_iter *, bool); | ||||
| unsigned bch2_sort_keys_keep_unwritten_whiteouts(struct bkey_packed *, struct sort_iter *); | ||||
| unsigned bch2_sort_keys(struct bkey_packed *, struct sort_iter *); | ||||
| 
 | ||||
| #endif /* _BCACHEFS_BKEY_SORT_H */ | ||||
|  |  | |||
|  | @ -288,8 +288,7 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, | |||
| 
 | ||||
| static void btree_node_sort(struct bch_fs *c, struct btree *b, | ||||
| 			    unsigned start_idx, | ||||
| 			    unsigned end_idx, | ||||
| 			    bool filter_whiteouts) | ||||
| 			    unsigned end_idx) | ||||
| { | ||||
| 	struct btree_node *out; | ||||
| 	struct sort_iter_stack sort_iter; | ||||
|  | @ -320,7 +319,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, | |||
| 
 | ||||
| 	start_time = local_clock(); | ||||
| 
 | ||||
| 	u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts); | ||||
| 	u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter); | ||||
| 
 | ||||
| 	out->keys.u64s = cpu_to_le16(u64s); | ||||
| 
 | ||||
|  | @ -426,13 +425,12 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b) | |||
| 			break; | ||||
| 
 | ||||
| 	if (b->nsets - unwritten_idx > 1) { | ||||
| 		btree_node_sort(c, b, unwritten_idx, | ||||
| 				b->nsets, false); | ||||
| 		btree_node_sort(c, b, unwritten_idx, b->nsets); | ||||
| 		ret = true; | ||||
| 	} | ||||
| 
 | ||||
| 	if (unwritten_idx > 1) { | ||||
| 		btree_node_sort(c, b, 0, unwritten_idx, false); | ||||
| 		btree_node_sort(c, b, 0, unwritten_idx); | ||||
| 		ret = true; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -2095,11 +2093,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) | |||
| 		      unwritten_whiteouts_end(b)); | ||||
| 	SET_BSET_SEPARATE_WHITEOUTS(i, false); | ||||
| 
 | ||||
| 	b->whiteout_u64s = 0; | ||||
| 
 | ||||
| 	u64s = bch2_sort_keys(i->start, &sort_iter.iter, false); | ||||
| 	u64s = bch2_sort_keys_keep_unwritten_whiteouts(i->start, &sort_iter.iter); | ||||
| 	le16_add_cpu(&i->u64s, u64s); | ||||
| 
 | ||||
| 	b->whiteout_u64s = 0; | ||||
| 
 | ||||
| 	BUG_ON(!b->written && i->u64s != b->data->keys.u64s); | ||||
| 
 | ||||
| 	set_needs_whiteout(i, false); | ||||
|  | @ -2249,7 +2247,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) | |||
| 	 * single bset: | ||||
| 	 */ | ||||
| 	if (b->nsets > 1) { | ||||
| 		btree_node_sort(c, b, 0, b->nsets, true); | ||||
| 		btree_node_sort(c, b, 0, b->nsets); | ||||
| 		invalidated_iter = true; | ||||
| 	} else { | ||||
| 		invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Kent Overstreet
						Kent Overstreet