From 636b074e162adc308bf33286d42e52076636284d Mon Sep 17 00:00:00 2001 From: Mayank <33252549+mynktl@users.noreply.github.com> Date: Tue, 19 Feb 2019 14:58:31 +0530 Subject: [PATCH] cherrypick(v0.8.x): deletion of stale clone dataset (#202) * [DE249]fix(zrepl): destroying all internal snapshots of rebuild clone dataset once rebuild completes (#199) * [TC57] fix(stale clone): delete stale volume in timer fn and its test cases (#200) * fix(memleak): freeing string in error case in uzfs_zinfo_destroy_stale_clone (#201) Signed-off-by: mayank --- include/sys/uzfs_zvol.h | 11 +++++ include/uzfs_rebuilding.h | 4 ++ include/zrepl_mgmt.h | 6 +++ lib/libzpool/uzfs_rebuilding.c | 46 +++++++++++++++++ lib/libzpool/zrepl_mgmt.c | 72 +++++++++++++++++++++++++++ lib/libzrepl/data_conn.c | 24 +++++++-- module/zfs/zvol.c | 4 +- tests/cbtest/gtest/test_uzfs.cc | 87 +++++++++++++++++++++++++++++++-- 8 files changed, 243 insertions(+), 11 deletions(-) diff --git a/include/sys/uzfs_zvol.h b/include/sys/uzfs_zvol.h index ae80ac4cb3f5..a678839b80e1 100644 --- a/include/sys/uzfs_zvol.h +++ b/include/sys/uzfs_zvol.h @@ -59,6 +59,14 @@ typedef struct zvol_rebuild_info { /* peer replica cnt whose rebuild is done and failure */ uint16_t rebuild_failed_cnt; + + /* + * does stale clone exist? + * If stale_clone_exist set to non-zero then timer thread will delete + * the clone and related_snapshot. + * rebuilding thread will set stale_clone_exist to 1. + */ + uint8_t stale_clone_exist; } zvol_rebuild_info_t; /* @@ -113,6 +121,9 @@ typedef struct zvol_state zvol_state_t; #define ZVOL_IS_REBUILDING_FAILED(zv) \ (zv->rebuild_info.zv_rebuild_status == ZVOL_REBUILDING_FAILED) +#define ZVOL_HAS_STALE_CLONE(zv) \ + (zv->rebuild_info.stale_clone_exist) + extern int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); const char *rebuild_status_to_str(zvol_rebuild_status_t status); diff --git a/include/uzfs_rebuilding.h b/include/uzfs_rebuilding.h index 1632dd95c033..536d664743e0 100644 --- a/include/uzfs_rebuilding.h +++ b/include/uzfs_rebuilding.h @@ -61,6 +61,10 @@ int uzfs_zvol_get_or_create_internal_clone(zvol_state_t *zv, int uzfs_zvol_release_internal_clone(zvol_state_t *zv, zvol_state_t *snap_zv, zvol_state_t *clone_zv); +/* + * To remove all internal snapshots of a dataset + */ +int uzfs_destroy_all_internal_snapshots(zvol_state_t *zv); boolean_t is_stale_clone(zvol_state_t *); #ifdef __cplusplus diff --git a/include/zrepl_mgmt.h b/include/zrepl_mgmt.h index df6948a9ab49..8b3390368360 100644 --- a/include/zrepl_mgmt.h +++ b/include/zrepl_mgmt.h @@ -75,6 +75,7 @@ typedef struct inject_delay_s { int downgraded_replica_rebuild_size_set; int io_receiver_exit; int helping_replica_rebuild_complete; + int rebuild_complete; } inject_delay_t; typedef struct inject_rebuild_error_s { @@ -276,6 +277,11 @@ uzfs_zinfo_take_refcnt(zvol_info_t *zinfo) atomic_inc_64(&zinfo->refcnt); } +/* + * To remove the internal stale clone + */ +int uzfs_zinfo_destroy_stale_clone(zvol_info_t *zinfo); + /* * ZAP key for io sequence number */ diff --git a/lib/libzpool/uzfs_rebuilding.c b/lib/libzpool/uzfs_rebuilding.c index 1c05ed4f54c1..8d8f65e77625 100644 --- a/lib/libzpool/uzfs_rebuilding.c +++ b/lib/libzpool/uzfs_rebuilding.c @@ -393,3 +393,49 @@ uzfs_zvol_get_or_create_internal_clone(zvol_state_t *zv, strfree(snapname); return (ret); } + +/* + * To destroy all internal created snapshot + * on a dataset + */ +int +uzfs_destroy_all_internal_snapshots(zvol_state_t *zv) +{ + int ret; + char snapname[MAXNAMELEN]; + objset_t *os; + uint64_t obj = 0, cookie = 0; + + if (!zv || !zv->zv_objset) + return (-1); + + os = zv->zv_objset; + + while (1) { + dsl_pool_config_enter(spa_get_dsl(zv->zv_spa), FTAG); + ret = dmu_snapshot_list_next(os, sizeof (snapname) - 1, + snapname, &obj, &cookie, NULL); + dsl_pool_config_exit(spa_get_dsl(zv->zv_spa), FTAG); + + if (ret) { + if (ret == ENOENT) + ret = 0; + break; + } + + if (!(strcmp(snapname, REBUILD_SNAPSHOT_SNAPNAME) == 0) && + !(strncmp(snapname, IO_DIFF_SNAPNAME, + sizeof (IO_DIFF_SNAPNAME) - 1) == 0)) { + continue; + } + + ret = destroy_snapshot_zv(zv, snapname); + if (ret != 0) { + LOG_ERR("Failed to destroy internal snap(%s) on:%s " + "with err:%d", snapname, zv->zv_name, ret); + break; + } + } + + return (ret); +} diff --git a/lib/libzpool/zrepl_mgmt.c b/lib/libzpool/zrepl_mgmt.c index 4722a55cc502..0c07cf405e6d 100644 --- a/lib/libzpool/zrepl_mgmt.c +++ b/lib/libzpool/zrepl_mgmt.c @@ -537,6 +537,11 @@ uzfs_zvol_destroy_snapshot_clone(zvol_state_t *zv, zvol_state_t *snap_zv, int ret1 = 0; char *clonename; + if (snap_zv == NULL) { + VERIFY(clone_zv != NULL); + return (0); + } + clonename = kmem_asprintf("%s/%s_%s", spa_name(zv->zv_spa), strchr(zv->zv_name, '/') + 1, REBUILD_SNAPSHOT_CLONENAME); @@ -544,6 +549,17 @@ uzfs_zvol_destroy_snapshot_clone(zvol_state_t *zv, zvol_state_t *snap_zv, LOG_INFO("Destroying %s and %s(%s) on:%s", snap_zv->zv_name, clone_zv->zv_name, clonename, zv->zv_name); + /* Destroy clone's snapshot */ + ret = uzfs_destroy_all_internal_snapshots(clone_zv); + if (ret != 0) { + LOG_ERR("Rebuild_clone snap destroy failed on:%s" + " with err:%d", zv->zv_name, ret); + } + + /* + * We need to release the snapshot zv so that next hold + * on dataset doesn't fail + */ uzfs_zvol_release_internal_clone(zv, snap_zv, clone_zv); // try_clone_delete_again: @@ -600,3 +616,59 @@ uzfs_zinfo_destroy_internal_clone(zvol_info_t *zinfo) clone_zv); return (ret); } + +/* + * This API is used to delete stale + * cloned volume and backing snapshot. + */ +int +uzfs_zinfo_destroy_stale_clone(zvol_info_t *zinfo) +{ + int ret = 0; + char *clone_subname = NULL; + zvol_state_t *l_snap_zv = NULL, *l_clone_zv = NULL; + zvol_state_t *zv; + + if (!zinfo->main_zv) + return (0); + + zv = zinfo->main_zv; + + ret = get_snapshot_zv(zv, REBUILD_SNAPSHOT_SNAPNAME, + &l_snap_zv, B_FALSE, B_TRUE); + if (ret != 0) { + LOG_ERR("Failed to get info about %s@%s", + zv->zv_name, REBUILD_SNAPSHOT_SNAPNAME); + return (ret); + } + + clone_subname = kmem_asprintf("%s_%s", strchr(zv->zv_name, '/') + 1, + REBUILD_SNAPSHOT_CLONENAME); + + ret = uzfs_open_dataset(zv->zv_spa, clone_subname, &l_clone_zv); + if (ret == 0) { + /* + * If hold on clone dataset fails then we will + * try to delete the clone after sometime. + */ + ret = uzfs_hold_dataset(l_clone_zv); + if (ret != 0) { + LOG_ERR("Failed to hold clone: %d", ret); + strfree(clone_subname); + uzfs_close_dataset(l_clone_zv); + uzfs_close_dataset(l_snap_zv); + return (ret); + } + } else { + uzfs_close_dataset(l_snap_zv); + strfree(clone_subname); + return (ret); + } + + if (!uzfs_zvol_destroy_snapshot_clone(zv, l_snap_zv, l_clone_zv)) + zv->rebuild_info.stale_clone_exist = 0; + + strfree(clone_subname); + + return (ret); +} diff --git a/lib/libzrepl/data_conn.c b/lib/libzrepl/data_conn.c index 2e3558bb874b..f4d93bdeec43 100644 --- a/lib/libzrepl/data_conn.c +++ b/lib/libzrepl/data_conn.c @@ -893,12 +893,23 @@ uzfs_zvol_rebuild_dw_replica(void *arg) close(sfd); } - if (wquiesce) - uzfs_zinfo_destroy_internal_clone(zinfo); +#ifdef DEBUG + if (inject_error.delay.rebuild_complete == 1) + sleep(10); +#endif + + if (wquiesce) { + if (uzfs_zinfo_destroy_internal_clone(zinfo)) { + mutex_enter(&zinfo->main_zv->rebuild_mtx); + zinfo->main_zv->rebuild_info.stale_clone_exist++; + mutex_exit(&zinfo->main_zv->rebuild_mtx); + } else { + zinfo->main_zv->rebuild_info.stale_clone_exist = 0; + } + } /* Parent thread have taken refcount, drop it now */ uzfs_zinfo_drop_refcnt(zinfo); - zk_thread_exit(); } @@ -962,6 +973,10 @@ uzfs_zvol_timer_thread(void) next_check = now + zinfo->update_ionum_interval; } + + if (ZVOL_HAS_STALE_CLONE(zinfo->main_zv)) { + uzfs_zinfo_destroy_stale_clone(zinfo); + } } else if (uzfs_zvol_get_status(zinfo->main_zv) == ZVOL_STATUS_DEGRADED && zinfo->main_zv->zv_objset) { @@ -1013,6 +1028,7 @@ uzfs_zvol_timer_thread(void) node_next); kmem_free(n_zinfo, sizeof (*n_zinfo)); } + zk_thread_exit(); } /* @@ -1514,7 +1530,6 @@ uzfs_zvol_rebuild_scanner(void *arg) } if (ZINFO_IS_DEGRADED(zinfo)) zv = zinfo->clone_zv; - rc = uzfs_zvol_create_internal_snapshot(zv, &snap_zv, metadata.io_num); if (rc != 0) { @@ -1651,6 +1666,7 @@ reinitialize_zv_state(zvol_state_t *zv) uzfs_zvol_set_status(zv, ZVOL_STATUS_DEGRADED); uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT); + zv->rebuild_info.stale_clone_exist = 0; } /* diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 61967769ce65..dc03035ed6cf 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -372,9 +372,9 @@ uzfs_ioc_stats(zfs_cmd_t *zc, nvlist_t *nvl) zv->main_zv->rebuild_info.zv_rebuild_status)); fnvlist_add_uint64(innvl, "isIOAckSenderCreated", - zv->is_io_ack_sender_created); + (zv->is_io_ack_sender_created) ? 1 : 0); fnvlist_add_uint64(innvl, "isIOReceiverCreated", - zv->is_io_receiver_created); + (zv->is_io_receiver_created) ? 1 : 0); fnvlist_add_uint64(innvl, "runningIONum", zv->running_ionum); fnvlist_add_uint64(innvl, "checkpointedIONum", diff --git a/tests/cbtest/gtest/test_uzfs.cc b/tests/cbtest/gtest/test_uzfs.cc index edc36e5663f7..61f5e95b7a28 100644 --- a/tests/cbtest/gtest/test_uzfs.cc +++ b/tests/cbtest/gtest/test_uzfs.cc @@ -2059,6 +2059,87 @@ void verify_ios_from_two_replica(void *arg) zk_thread_exit(); } +static int +check_if_snap_exist(zvol_state_t *zv, char *snap) +{ + int ret; + char snapname[MAXNAMELEN]; + objset_t *os; + uint64_t obj = 0, cookie = 0; + + if (!zv || !zv->zv_objset) + return (-1); + + os = zv->zv_objset; + while (1) { + dsl_pool_config_enter(spa_get_dsl(zv->zv_spa), FTAG); + ret = dmu_snapshot_list_next(os, sizeof (snapname) - 1, + snapname, &obj, &cookie, NULL); + dsl_pool_config_exit(spa_get_dsl(zv->zv_spa), FTAG); + + if (ret) { + if (ret == ENOENT) + ret = 0; + break; + } + if (strcmp(snapname, snap) == 0) { + ret = 1; + break; + } + } + + return (ret); +} + +TEST(uZFSRebuild, TestRebuildSnapDeletion) { + int data_conn_fd1, data_conn_fd3; + rebuild_scanner = &uzfs_mock_rebuild_scanner_abrupt_conn_close; + dw_replica_fn = &uzfs_zvol_rebuild_dw_replica; + io_receiver = &uzfs_zvol_io_receiver; + + zinfo->main_zv->zv_status = ZVOL_STATUS_DEGRADED; + zvol_rebuild_step_size = (1024ULL * 1024ULL * 1024ULL) / 2 + 1000; + do_data_connection(data_conn_fd1, "127.0.0.1", IO_SERVER_PORT, "vol1"); + do_data_connection(data_conn_fd3, "127.0.0.1", IO_SERVER_PORT, "vol3"); + + uint64_t quorum = -1; + EXPECT_EQ(0, dsl_prop_get_integer(zinfo->main_zv->zv_name, + zfs_prop_to_name(ZFS_PROP_QUORUM), &quorum, NULL)); + EXPECT_EQ(quorum, 0); + + /* thread that helps rebuilding exits abruptly just after connects */ + execute_rebuild_test_case("rebuild abrupt", 1, ZVOL_REBUILDING_SNAP, + ZVOL_REBUILDING_FAILED, 4, "vol3"); + close(data_conn_fd1); + + EXPECT_EQ(NULL, !zinfo->clone_zv); + EXPECT_EQ(0, dmu_objset_snapshot_one(zinfo->clone_zv->zv_name, ".io_snap100.2")); + EXPECT_EQ(0, dmu_objset_snapshot_one(zinfo->clone_zv->zv_name, ".io_snap100.1")); + + rebuild_scanner = &uzfs_zvol_rebuild_scanner; + zinfo->main_zv->zv_status = ZVOL_STATUS_DEGRADED; +#ifdef DEBUG + inject_error.delay.rebuild_complete = 1; +#endif + do_data_connection(data_conn_fd1, "127.0.0.1", IO_SERVER_PORT, "vol1"); + execute_rebuild_test_case("complete rebuild with data conn", 15, + ZVOL_REBUILDING_SNAP, ZVOL_REBUILDING_DONE, 4, "vol3"); + + sleep(12); +#ifdef DEBUG + inject_error.delay.rebuild_complete = 0; +#endif + + EXPECT_EQ(0, dsl_prop_get_integer(zinfo->main_zv->zv_name, + zfs_prop_to_name(ZFS_PROP_QUORUM), &quorum, NULL)); + EXPECT_EQ(quorum, 1); + + EXPECT_EQ(0, check_if_snap_exist(zinfo->main_zv, (char *)REBUILD_SNAPSHOT_SNAPNAME)); + close(data_conn_fd1); + close(data_conn_fd3); + sleep(10); +} + TEST(uZFSRebuild, TestErroredRebuild) { replica_writes_io_t wargs = { 0 }; kthread_t *writer_thread, *reader_thread; @@ -2087,11 +2168,6 @@ TEST(uZFSRebuild, TestErroredRebuild) { #ifdef DEBUG inject_error.inject_rebuild_error.dw_replica_rebuild_error_io = (total_ios) / 4; #endif - uint64_t quorum = -1; - EXPECT_EQ(0, dsl_prop_get_integer(zinfo->main_zv->zv_name, - zfs_prop_to_name(ZFS_PROP_QUORUM), &quorum, NULL)); - EXPECT_EQ(quorum, 0); - execute_rebuild_test_case("errored rebuild with data conn", 15, ZVOL_REBUILDING_SNAP, ZVOL_REBUILDING_FAILED, 4, "vol3"); close(wargs.r1_fd); @@ -2130,6 +2206,7 @@ TEST(uZFSRebuild, TestErroredRebuild) { 0, 0); zk_thread_join(writer_thread->t_tid); + uint64_t quorum = 0; EXPECT_EQ(0, dsl_prop_get_integer(zinfo->main_zv->zv_name, zfs_prop_to_name(ZFS_PROP_QUORUM), &quorum, NULL)); EXPECT_EQ(quorum, 1);