From 8849c6dc127d8252d959978dbd6e13f81a4c73eb Mon Sep 17 00:00:00 2001 From: Brian Atkinson Date: Tue, 29 Jun 2021 14:00:49 -0600 Subject: [PATCH] Creating gang ABDs for Raidz optional IOs In order to reduce contention on the vq_lock, optional no data blocks for Raidz are put into gang ABDs. This allows for a reduction on the number of IO issued down to the children VDEVs reducing contention on the vq_lock when issuing IO for skip sectors. Signed-off-by: Brian Atkinson --- module/zfs/vdev_raidz.c | 82 ++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 1feebf7089b4..b8ec2847bb07 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1485,6 +1485,18 @@ vdev_raidz_child_done(zio_t *zio) rc->rc_error = zio->io_error; rc->rc_tried = 1; rc->rc_skipped = 0; + + /* + * If we created a gang ABD to aggregate IO's for writes we will + * free the gang ABD here and reset the column's ABD to the original + * ABD. + */ + if (zio->io_type == ZIO_TYPE_WRITE && abd_is_gang(rc->rc_abd)) { + ASSERT3P(rc->rc_orig_data, !=, rc->rc_abd); + abd_free(rc->rc_abd); + rc->rc_abd = rc->rc_orig_data; + rc->rc_orig_data = NULL; + } } static void @@ -1525,41 +1537,67 @@ vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr, uint64_t ashift) { vdev_t *vd = zio->io_vd; raidz_map_t *rm = zio->io_vsd; - int c, i; + int c, i = 0, skip_first_cols = -1; + abd_t *abd; vdev_raidz_generate_parity_row(rm, rr); - for (int c = 0; c < rr->rr_cols; c++) { + IMPLY(rm->rm_nskip > 0, rm->rm_skipstart < rr->rr_scols); + + if (rr->rr_scols < (rm->rm_skipstart + rm->rm_nskip)) { + skip_first_cols = + (rm->rm_skipstart + rm->rm_nskip) % rr->rr_scols; + } + + ASSERT3S(skip_first_cols, <, rr->rr_scols); + IMPLY(rm->rm_nskip == 0, skip_first_cols = -1); + + for (c = 0; c < rr->rr_scols; c++) { raidz_col_t *rc = &rr->rr_col[c]; - if (rc->rc_size == 0) - continue; /* Verify physical to logical translation */ vdev_raidz_io_verify(vd, rr, c); + /* + * Generate I/O for skip sectors to improve aggregation + * contiguity. We will use gang ABD's to reduce contention + * on the children VDEV queue locks (vq_lock) by issuing + * a single I/O that contains the data and skip sectors. + */ + if ((skip_first_cols > -1 && skip_first_cols < c && + i < rm->rm_nskip) || (c >= rm->rm_skipstart && + i < rm->rm_nskip)) { + abd = abd_alloc_gang(); + if (rc->rc_size > 0) { + abd_gang_add(abd, rc->rc_abd, B_FALSE); + } else { + ASSERT3P(rc->rc_abd, ==, NULL); + } + abd_gang_add(abd, abd_get_zeros(1ULL << ashift), + B_TRUE); + + /* + * Store original ABD so the gang ABD can be freed in + * vdev_raidz_child_done(). + */ + ASSERT3P(rc->rc_orig_data, ==, NULL); + rc->rc_orig_data = rc->rc_abd; + rc->rc_abd = abd; + i++; + } else { + /* + * I/O does not contain any skip sectors. + */ + abd = rc->rc_abd; + } + zio_nowait(zio_vdev_child_io(zio, NULL, vd->vdev_child[rc->rc_devidx], rc->rc_offset, - rc->rc_abd, rc->rc_size, zio->io_type, zio->io_priority, + abd, abd_get_size(abd), zio->io_type, zio->io_priority, 0, vdev_raidz_child_done, rc)); } - /* - * Generate optional I/Os for skip sectors to improve aggregation - * contiguity. - */ - for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) { - ASSERT(c <= rr->rr_scols); - if (c == rr->rr_scols) - c = 0; - - raidz_col_t *rc = &rr->rr_col[c]; - vdev_t *cvd = vd->vdev_child[rc->rc_devidx]; - - zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset + rc->rc_size, NULL, 1ULL << ashift, - zio->io_type, zio->io_priority, - ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); - } + ASSERT3S(i, ==, rm->rm_nskip); } static void