Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify dRAID empty sectors #12857

Merged
merged 1 commit into from
Jan 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/vdev_draid.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ extern boolean_t vdev_draid_readable(vdev_t *, uint64_t);
extern boolean_t vdev_draid_missing(vdev_t *, uint64_t, uint64_t, uint64_t);
extern uint64_t vdev_draid_asize_to_psize(vdev_t *, uint64_t);
extern void vdev_draid_map_alloc_empty(zio_t *, struct raidz_row *);
extern int vdev_draid_map_verify_empty(zio_t *, struct raidz_row *);
extern nvlist_t *vdev_draid_read_config_spare(vdev_t *);

/* Functions for dRAID distributed spares. */
Expand Down
2 changes: 2 additions & 0 deletions include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ extern "C" {
#endif

struct zio;
struct raidz_col;
struct raidz_row;
struct raidz_map;
#if !defined(_KERNEL)
Expand All @@ -49,6 +50,7 @@ void vdev_raidz_generate_parity(struct raidz_map *);
void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
void vdev_raidz_child_done(zio_t *);
void vdev_raidz_io_done(zio_t *);
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);

extern const zio_vsd_ops_t vdev_raidz_vsd_ops;

Expand Down
47 changes: 47 additions & 0 deletions module/zfs/vdev_draid.c
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,53 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
ASSERT3U(skip_off, ==, rr->rr_nempty * skip_size);
}

/*
* Verify that all empty sectors are zero filled before using them to
* calculate parity. Otherwise, silent corruption in an empty sector will
* result in bad parity being generated. That bad parity will then be
* considered authoritative and overwrite the good parity on disk. This
* is possible because the checksum is only calculated over the data,
* thus it cannot be used to detect damage in empty sectors.
*/
int
vdev_draid_map_verify_empty(zio_t *zio, raidz_row_t *rr)
{
uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
uint64_t parity_size = rr->rr_col[0].rc_size;
uint64_t skip_off = parity_size - skip_size;
uint64_t empty_off = 0;
int ret = 0;

ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
ASSERT3P(rr->rr_abd_empty, !=, NULL);
ASSERT3U(rr->rr_bigcols, >, 0);

void *zero_buf = kmem_zalloc(skip_size, KM_SLEEP);
behlendorf marked this conversation as resolved.
Show resolved Hide resolved

for (int c = rr->rr_bigcols; c < rr->rr_cols; c++) {
raidz_col_t *rc = &rr->rr_col[c];

ASSERT3P(rc->rc_abd, !=, NULL);
ASSERT3U(rc->rc_size, ==, parity_size);

if (abd_cmp_buf_off(rc->rc_abd, zero_buf, skip_off,
skip_size) != 0) {
vdev_raidz_checksum_error(zio, rc, rc->rc_abd);
abd_zero_off(rc->rc_abd, skip_off, skip_size);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}

empty_off += skip_size;
}

ASSERT3U(empty_off, ==, abd_get_size(rr->rr_abd_empty));

kmem_free(zero_buf, skip_size);

return (ret);
}

/*
* Given a logical address within a dRAID configuration, return the physical
* address on the first drive in the group that this address maps to
Expand Down
16 changes: 11 additions & 5 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -1752,8 +1752,8 @@ vdev_raidz_io_start(zio_t *zio)
/*
* Report a checksum error for a child of a RAID-Z device.
*/
static void
raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
void
vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];

Expand Down Expand Up @@ -1823,6 +1823,13 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
abd_copy(orig[c], rc->rc_abd, rc->rc_size);
}

/*
* Verify any empty sectors are zero filled to ensure the parity
* is calculated correctly even if these non-data sectors are damaged.
*/
if (rr->rr_nempty && rr->rr_abd_empty != NULL)
ret += vdev_draid_map_verify_empty(zio, rr);

/*
* Regenerates parity even for !tried||rc_error!=0 columns. This
* isn't harmful but it does have the side effect of fixing stuff
Expand All @@ -1837,7 +1844,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
continue;

if (abd_cmp(orig[c], rc->rc_abd) != 0) {
raidz_checksum_error(zio, rc, orig[c]);
vdev_raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}
Expand Down Expand Up @@ -1897,7 +1904,6 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
(zio->io_flags & ZIO_FLAG_RESILVER)) {
int n = raidz_parity_verify(zio, rr);
unexpected_errors += n;
ASSERT3U(parity_errors + n, <=, rr->rr_firstdatacol);
}

if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
Expand Down Expand Up @@ -2023,7 +2029,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
*/
if (rc->rc_error == 0 &&
c >= rr->rr_firstdatacol) {
raidz_checksum_error(zio,
vdev_raidz_checksum_error(zio,
rc, rc->rc_orig_data);
rc->rc_error =
SET_ERROR(ECKSUM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@
# in zpool status.
#
# STRATEGY:
# 1. Create a raidz or mirror pool
# 1. Create a mirror, raidz, or draid pool
# 2. Inject read/write IO errors or checksum errors
# 3. Verify the number of errors in zpool status match the corresponding
# number of error events.
# 4. Repeat for all combinations of raidz/mirror and io/checksum errors.
# 4. Repeat for all combinations of mirror/raidz/draid and io/checksum
# errors.
#

. $STF_SUITE/include/libtest.shlib
Expand Down Expand Up @@ -74,7 +75,7 @@ log_must mkdir -p $MOUNTDIR

# Run error test on a specific type of pool
#
# $1: pool - raidz, mirror
# $1: pool - mirror, raidz, draid
# $2: test type - corrupt (checksum error), io
# $3: read, write
function do_test
Expand Down Expand Up @@ -142,8 +143,8 @@ function do_test
log_must zpool destroy $POOL
}

# Test all types of errors on mirror and raidz pools
for pooltype in mirror raidz ; do
# Test all types of errors on mirror, raidz, and draid pools
for pooltype in mirror raidz draid; do
do_test $pooltype corrupt read
do_test $pooltype io read
do_test $pooltype io write
Expand Down