bcachefs: Journal read error message improvements

- Don't print a checksum error when we first read a journal entry: we
  print a checksum error later if we'll be using the journal entry.

- Continuing with the theme of of improving error messages and grouping
  errors into a single log message per error, print a single 'checksum
  error' message per journal entry, and use bch2_journal_ptr_to_text()
  to print out where on the device it was.

- Factor out checksum error messages and checking for missing journal
  entries into helpers, bch2_journal_read() has gotten obnoxiously big.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-05-26 12:21:57 -04:00
parent 72ab5136e8
commit cd04497b10
2 changed files with 141 additions and 90 deletions

View file

@ -49,14 +49,31 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c)
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
} }
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p)
struct journal_replay *j) {
struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev);
prt_printf(out, "%s %u:%u:%u (sector %llu)",
ca ? ca->name : "(invalid dev)",
p->dev, p->bucket, p->bucket_offset, p->sector);
bch2_dev_put(ca);
}
void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j)
{ {
darray_for_each(j->ptrs, i) { darray_for_each(j->ptrs, i) {
if (i != j->ptrs.data) if (i != j->ptrs.data)
prt_printf(out, " "); prt_printf(out, " ");
prt_printf(out, "%u:%u:%u (sector %llu)", bch2_journal_ptr_to_text(out, c, i);
i->dev, i->bucket, i->bucket_offset, i->sector); }
}
static void bch2_journal_datetime_to_text(struct printbuf *out, struct jset *j)
{
for_each_jset_entry_type(entry, j, BCH_JSET_ENTRY_datetime) {
struct jset_entry_datetime *datetime =
container_of(entry, struct jset_entry_datetime, entry);
bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
break;
} }
} }
@ -64,15 +81,9 @@ static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
struct journal_replay *j) struct journal_replay *j)
{ {
prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq)); prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq));
bch2_journal_datetime_to_text(out, &j->j);
prt_char(out, ' ');
bch2_journal_ptrs_to_text(out, c, j); bch2_journal_ptrs_to_text(out, c, j);
for_each_jset_entry_type(entry, &j->j, BCH_JSET_ENTRY_datetime) {
struct jset_entry_datetime *datetime =
container_of(entry, struct jset_entry_datetime, entry);
bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
break;
}
} }
static struct nonce journal_nonce(const struct jset *jset) static struct nonce journal_nonce(const struct jset *jset)
@ -1037,7 +1048,6 @@ static int journal_read_bucket(struct bch_dev *ca,
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
end = offset + ca->mi.bucket_size; end = offset + ca->mi.bucket_size;
bool saw_bad = false, csum_good; bool saw_bad = false, csum_good;
struct printbuf err = PRINTBUF;
int ret = 0; int ret = 0;
pr_debug("reading %u", bucket); pr_debug("reading %u", bucket);
@ -1078,7 +1088,7 @@ static int journal_read_bucket(struct bch_dev *ca,
* found on a different device, and missing or * found on a different device, and missing or
* no journal entries will be handled later * no journal entries will be handled later
*/ */
goto out; return 0;
} }
j = buf->data; j = buf->data;
@ -1095,12 +1105,12 @@ static int journal_read_bucket(struct bch_dev *ca,
ret = journal_read_buf_realloc(buf, ret = journal_read_buf_realloc(buf,
vstruct_bytes(j)); vstruct_bytes(j));
if (ret) if (ret)
goto err; return ret;
} }
goto reread; goto reread;
case JOURNAL_ENTRY_NONE: case JOURNAL_ENTRY_NONE:
if (!saw_bad) if (!saw_bad)
goto out; return 0;
/* /*
* On checksum error we don't really trust the size * On checksum error we don't really trust the size
* field of the journal entry we read, so try reading * field of the journal entry we read, so try reading
@ -1109,7 +1119,7 @@ static int journal_read_bucket(struct bch_dev *ca,
sectors = block_sectors(c); sectors = block_sectors(c);
goto next_block; goto next_block;
default: default:
goto err; return ret;
} }
if (le64_to_cpu(j->seq) > ja->highest_seq_found) { if (le64_to_cpu(j->seq) > ja->highest_seq_found) {
@ -1126,22 +1136,20 @@ static int journal_read_bucket(struct bch_dev *ca,
* bucket: * bucket:
*/ */
if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
goto out; return 0;
ja->bucket_seq[bucket] = le64_to_cpu(j->seq); ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
struct bch_csum csum; struct bch_csum csum;
csum_good = jset_csum_good(c, j, &csum); csum_good = jset_csum_good(c, j, &csum);
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_checksum, 0, csum_good); bch2_account_io_completion(ca, BCH_MEMBER_ERROR_checksum, 0, csum_good);
if (!csum_good) { if (!csum_good) {
bch_err_dev_ratelimited(ca, "%s", /*
(printbuf_reset(&err), * Don't print an error here, we'll print the error
prt_str(&err, "journal "), * later if we need this journal entry
bch2_csum_err_msg(&err, csum_type, j->csum, csum), */
err.buf));
saw_bad = true; saw_bad = true;
} }
@ -1153,6 +1161,7 @@ static int journal_read_bucket(struct bch_dev *ca,
mutex_lock(&jlist->lock); mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct journal_ptr) { ret = journal_entry_add(c, ca, (struct journal_ptr) {
.csum_good = csum_good, .csum_good = csum_good,
.csum = csum,
.dev = ca->dev_idx, .dev = ca->dev_idx,
.bucket = bucket, .bucket = bucket,
.bucket_offset = offset - .bucket_offset = offset -
@ -1167,7 +1176,7 @@ static int journal_read_bucket(struct bch_dev *ca,
case JOURNAL_ENTRY_ADD_OUT_OF_RANGE: case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
break; break;
default: default:
goto err; return ret;
} }
next_block: next_block:
pr_debug("next"); pr_debug("next");
@ -1176,11 +1185,7 @@ static int journal_read_bucket(struct bch_dev *ca,
j = ((void *) j) + (sectors << 9); j = ((void *) j) + (sectors << 9);
} }
out: return 0;
ret = 0;
err:
printbuf_exit(&err);
return ret;
} }
static CLOSURE_CALLBACK(bch2_journal_read_device) static CLOSURE_CALLBACK(bch2_journal_read_device)
@ -1229,13 +1234,105 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
goto out; goto out;
} }
noinline_for_stack
static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j)
{
struct printbuf buf = PRINTBUF;
enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j);
bool have_good = false;
prt_printf(&buf, "invalid journal checksum(s) at seq %llu ", le64_to_cpu(j->j.seq));
bch2_journal_datetime_to_text(&buf, &j->j);
prt_newline(&buf);
darray_for_each(j->ptrs, ptr)
if (!ptr->csum_good) {
bch2_journal_ptr_to_text(&buf, c, ptr);
prt_char(&buf, ' ');
bch2_csum_to_text(&buf, csum_type, ptr->csum);
prt_newline(&buf);
} else {
have_good = true;
}
prt_printf(&buf, "should be ");
bch2_csum_to_text(&buf, csum_type, j->j.csum);
if (have_good)
prt_printf(&buf, "\n(had good copy on another device)");
bch2_print_str(c, KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
noinline_for_stack
static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq)
{
struct printbuf buf = PRINTBUF;
int ret = 0;
struct genradix_iter radix_iter;
struct journal_replay *i, **_i, *prev = NULL;
u64 seq = start_seq;
genradix_for_each(&c->journal_entries, radix_iter, _i) {
i = *_i;
if (journal_replay_ignore(i))
continue;
BUG_ON(seq > le64_to_cpu(i->j.seq));
while (seq < le64_to_cpu(i->j.seq)) {
while (seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_is_blacklisted(c, seq, false))
seq++;
if (seq == le64_to_cpu(i->j.seq))
break;
u64 missing_start = seq;
while (seq < le64_to_cpu(i->j.seq) &&
!bch2_journal_seq_is_blacklisted(c, seq, false))
seq++;
u64 missing_end = seq - 1;
printbuf_reset(&buf);
prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)",
missing_start, missing_end,
start_seq, end_seq);
prt_printf(&buf, "\nprev at ");
if (prev) {
bch2_journal_ptrs_to_text(&buf, c, prev);
prt_printf(&buf, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
} else
prt_printf(&buf, "(none)");
prt_printf(&buf, "\nnext at ");
bch2_journal_ptrs_to_text(&buf, c, i);
prt_printf(&buf, ", continue?");
fsck_err(c, journal_entries_missing, "%s", buf.buf);
}
prev = i;
seq++;
}
fsck_err:
printbuf_exit(&buf);
return ret;
}
int bch2_journal_read(struct bch_fs *c, int bch2_journal_read(struct bch_fs *c,
u64 *last_seq, u64 *last_seq,
u64 *blacklist_seq, u64 *blacklist_seq,
u64 *start_seq) u64 *start_seq)
{ {
struct journal_list jlist; struct journal_list jlist;
struct journal_replay *i, **_i, *prev = NULL; struct journal_replay *i, **_i;
struct genradix_iter radix_iter; struct genradix_iter radix_iter;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bool degraded = false, last_write_torn = false; bool degraded = false, last_write_torn = false;
@ -1354,56 +1451,9 @@ int bch2_journal_read(struct bch_fs *c,
} }
} }
/* Check for missing entries: */ ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1);
seq = *last_seq; if (ret)
genradix_for_each(&c->journal_entries, radix_iter, _i) { goto err;
i = *_i;
if (journal_replay_ignore(i))
continue;
BUG_ON(seq > le64_to_cpu(i->j.seq));
while (seq < le64_to_cpu(i->j.seq)) {
u64 missing_start, missing_end;
struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
while (seq < le64_to_cpu(i->j.seq) &&
bch2_journal_seq_is_blacklisted(c, seq, false))
seq++;
if (seq == le64_to_cpu(i->j.seq))
break;
missing_start = seq;
while (seq < le64_to_cpu(i->j.seq) &&
!bch2_journal_seq_is_blacklisted(c, seq, false))
seq++;
if (prev) {
bch2_journal_ptrs_to_text(&buf1, c, prev);
prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
} else
prt_printf(&buf1, "(none)");
bch2_journal_ptrs_to_text(&buf2, c, i);
missing_end = seq - 1;
fsck_err(c, journal_entries_missing,
"journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
"prev at %s\n"
"next at %s, continue?",
missing_start, missing_end,
*last_seq, *blacklist_seq - 1,
buf1.buf, buf2.buf);
printbuf_exit(&buf1);
printbuf_exit(&buf2);
}
prev = i;
seq++;
}
genradix_for_each(&c->journal_entries, radix_iter, _i) { genradix_for_each(&c->journal_entries, radix_iter, _i) {
union bch_replicas_padded replicas = { union bch_replicas_padded replicas = {
@ -1416,15 +1466,15 @@ int bch2_journal_read(struct bch_fs *c,
if (journal_replay_ignore(i)) if (journal_replay_ignore(i))
continue; continue;
darray_for_each(i->ptrs, ptr) { /*
struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); * Don't print checksum errors until we know we're going to use
* a given journal entry:
if (!ptr->csum_good) */
bch_err_dev_offset(ca, ptr->sector, darray_for_each(i->ptrs, ptr)
"invalid journal checksum, seq %llu%s", if (!ptr->csum_good) {
le64_to_cpu(i->j.seq), bch2_journal_print_checksum_error(c, i);
i->csum_good ? " (had good copy on another device)" : ""); break;
} }
ret = jset_validate(c, ret = jset_validate(c,
bch2_dev_have_ref(c, i->ptrs.data[0].dev), bch2_dev_have_ref(c, i->ptrs.data[0].dev),

View file

@ -9,6 +9,7 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *);
struct journal_ptr { struct journal_ptr {
bool csum_good; bool csum_good;
struct bch_csum csum;
u8 dev; u8 dev;
u32 bucket; u32 bucket;
u32 bucket_offset; u32 bucket_offset;