Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ahrens committed Aug 31, 2020
1 parent 0ccbc9e commit a0baa47
Showing 1 changed file with 32 additions and 60 deletions.
92 changes: 32 additions & 60 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -3119,6 +3119,28 @@ raidz_reflow_read_done(zio_t *zio)
zio_nowait(zio_unique_parent(zio));
}

static void
raidz_reflow_record_progress(vdev_raidz_expand_t *vre, uint64_t offset,
dmu_tx_t *tx)
{
int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;

if (offset == 0)
return;

mutex_enter(&vre->vre_lock);
ASSERT3U(vre->vre_offset, <=, offset);
vre->vre_offset = offset;
mutex_exit(&vre->vre_lock);

if (vre->vre_offset_pertxg[txgoff] == 0) {
dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync,
spa, 0, ZFS_SPACE_CHECK_NONE, tx);
}
vre->vre_offset_pertxg[txgoff] = offset;
}

static boolean_t
raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt,
dmu_tx_t *tx)
Expand All @@ -3141,28 +3163,20 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt,
int old_children = vd->vdev_children - 1;

/*
* If this would cause us to pass a block whose progress has not
* yet been committed to disk, return TRUE indicating that we need
* to try again in the next txg, and advance only to the point we
* are able. Otherwise a subsequent write into the unallocated region
* we are skipping could cause an overlap.
* We can only progress to the point that writes will not overlap with
* blocks whose progress has not yet been recorded on disk
* (vre_offset_phys). Note that even if we are skipping over a large
* unallocated region, we can't move the on-disk progress to `offset`,
* because concurrent writes/allocations could still use the
* currently-unallocated region.
*/
uint64_t vre_offset_phys_blkid =
MAX(old_children, vre->vre_offset_phys >> ashift);
/*
* We can't overwrite this block.
*/
uint64_t next_overwrite_blkid = vre_offset_phys_blkid +
vre_offset_phys_blkid / old_children;
if (blkid >= next_overwrite_blkid) {
mutex_enter(&vre->vre_lock);
vre->vre_offset = next_overwrite_blkid << ashift;
mutex_exit(&vre->vre_lock);
if (vre->vre_offset > 0 && vre->vre_offset_pertxg[txgoff] == 0) {
dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync,
spa, 0, ZFS_SPACE_CHECK_NONE, tx);
}
vre->vre_offset_pertxg[txgoff] = vre->vre_offset;
raidz_reflow_record_progress(vre,
next_overwrite_blkid << ashift, tx);

zfs_dbgmsg("copying offset %llu, vre_offset_phys %llu, "
"max_overwrite = %llu wait for txg %llu",
Expand All @@ -3173,40 +3187,6 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt,
return (B_TRUE);
}

/*
* Record the fact that we've completed up to the beginning
* of this segment. This is important since there could be
* an unallocated segment preceding this, and the overwrite-check
* code needs to know that we have processed up to this point.
*/
mutex_enter(&vre->vre_lock);
vre->vre_offset = offset;
mutex_exit(&vre->vre_lock);
if (vre->vre_offset > 0 && vre->vre_offset_pertxg[txgoff] == 0) {
dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync,
spa, 0, ZFS_SPACE_CHECK_NONE, tx);
}
vre->vre_offset_pertxg[txgoff] = vre->vre_offset;

/*
* If this would cause us to overwrite a block whose progress has not
* yet been committed to disk, return TRUE indicating that we need
* to try again in the next txg.
*/
uint64_t overwrite_blkid =
(blkid / vd->vdev_children) * old_children +
(blkid % vd->vdev_children);
/* XXX allow overwrite of first row for now */
if (blkid > vd->vdev_children &&
overwrite_blkid << ashift >= vre->vre_offset_phys) {
zfs_dbgmsg("copying offset %llu, vre_offset_phys %llu, "
"wait for txg %llu",
(long long)offset,
(long long)vre->vre_offset_phys,
(long long)dmu_tx_get_txg(tx));
return (B_TRUE);
}

range_tree_remove(rt, offset, length);

raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra), KM_SLEEP);
Expand All @@ -3217,20 +3197,12 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt,
zfs_dbgmsg("initiating reflow write offset=%llu length=%llu",
offset, length);

raidz_reflow_record_progress(vre, offset + length, tx);

mutex_enter(&vre->vre_lock);
ASSERT3U(vre->vre_offset, <=, offset);
vre->vre_offset = offset + length;
vre->vre_outstanding_bytes += length;
mutex_exit(&vre->vre_lock);

#if 0 /* XXX already done above */
if (vre->vre_offset_pertxg[txgoff] == 0) {
dsl_sync_task_nowait(dmu_tx_pool(tx), raidz_reflow_sync,
spa, 0, ZFS_SPACE_CHECK_NONE, tx);
}
vre->vre_offset_pertxg[txgoff] = vre->vre_offset;
#endif

/*
* SCL_STATE will be released when the read and write are done,
* by raidz_reflow_write_done().
Expand Down

0 comments on commit a0baa47

Please sign in to comment.