Skip to content

Commit

Permalink
Creating gang ABDs for Raidz optional IOs
Browse files Browse the repository at this point in the history
In order to reduce contention on the vq_lock, optional no data blocks
for Raidz are put into gang ABDs. This allows for a reduction on the
number of IO issued down to the children VDEVs reducing contention on
the vq_lock when issuing IO for skip sectors.

Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
  • Loading branch information
bwatkinson committed Jul 8, 2021
1 parent bdd11cb commit 2a4025d
Showing 1 changed file with 61 additions and 22 deletions.
83 changes: 61 additions & 22 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,18 @@ vdev_raidz_child_done(zio_t *zio)
rc->rc_error = zio->io_error;
rc->rc_tried = 1;
rc->rc_skipped = 0;

/*
* If we created a gang ABD to aggregate IO's for writes we will
* free the gang ABD here and reset the column's ABD to the original
* ABD.
*/
if (zio->io_type == ZIO_TYPE_WRITE && abd_is_gang(rc->rc_abd)) {
ASSERT3P(rc->rc_orig_data, !=, rc->rc_abd);
abd_free(rc->rc_abd);
rc->rc_abd = rc->rc_orig_data;
rc->rc_orig_data = NULL;
}
}

static void
Expand Down Expand Up @@ -1525,41 +1537,68 @@ vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr, uint64_t ashift)
{
vdev_t *vd = zio->io_vd;
raidz_map_t *rm = zio->io_vsd;
int c, i;
int c, i = 0, skip_first_cols = -1;

vdev_raidz_generate_parity_row(rm, rr);

for (int c = 0; c < rr->rr_cols; c++) {
IMPLY(rm->rm_nskip > 0, rm->rm_skipstart < rr->rr_scols);

if (rr->rr_scols < (rm->rm_skipstart + rm->rm_nskip)) {
skip_first_cols =
(rm->rm_skipstart + rm->rm_nskip) % rr->rr_scols;
}

ASSERT3S(skip_first_cols, <, rr->rr_scols);
IMPLY(rm->rm_nskip == 0, skip_first_cols = -1);

for (c = 0; c < rr->rr_scols; c++) {
abd_t *abd = NULL;
raidz_col_t *rc = &rr->rr_col[c];
if (rc->rc_size == 0)
continue;

/* Verify physical to logical translation */
vdev_raidz_io_verify(vd, rr, c);

/*
* Generate I/O for skip sectors to improve aggregation
* contiguity. We will use gang ABD's to reduce contention
* on the children VDEV queue locks (vq_lock) by issuing
* a single I/O that contains the data and skip sectors.
*/
if (((skip_first_cols > -1 && c < skip_first_cols) ||
c >= rm->rm_skipstart) && i < rm->rm_nskip) {
abd = abd_alloc_gang();
if (rc->rc_size > 0) {
abd_gang_add(abd, rc->rc_abd, B_FALSE);
} else {
ASSERT3P(rc->rc_abd, ==, NULL);
}
abd_gang_add(abd, abd_get_zeros(1ULL << ashift),
B_TRUE);

/*
* Store original ABD so the gang ABD can be freed in
* vdev_raidz_child_done().
*/
ASSERT3P(rc->rc_orig_data, ==, NULL);
rc->rc_orig_data = rc->rc_abd;
rc->rc_abd = abd;
i++;
} else {
/*
* I/O does not contain any skip sectors.
*/
abd = rc->rc_abd;
}

ASSERT3P(abd, !=, NULL);

zio_nowait(zio_vdev_child_io(zio, NULL,
vd->vdev_child[rc->rc_devidx], rc->rc_offset,
rc->rc_abd, rc->rc_size, zio->io_type, zio->io_priority,
abd, abd_get_size(abd), zio->io_type, zio->io_priority,
0, vdev_raidz_child_done, rc));
}

/*
* Generate optional I/Os for skip sectors to improve aggregation
* contiguity.
*/
for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) {
ASSERT(c <= rr->rr_scols);
if (c == rr->rr_scols)
c = 0;

raidz_col_t *rc = &rr->rr_col[c];
vdev_t *cvd = vd->vdev_child[rc->rc_devidx];

zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
rc->rc_offset + rc->rc_size, NULL, 1ULL << ashift,
zio->io_type, zio->io_priority,
ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
}
ASSERT3S(i, ==, rm->rm_nskip);
}

static void
Expand Down

0 comments on commit 2a4025d

Please sign in to comment.