From ac02a59aab6cb27d417b866c60d9048a60f6dc26 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 28 Aug 2013 06:45:09 -0500 Subject: [PATCH 1/2] Illumos #2882, #2883, #2900 2882 implement libzfs_core 2883 changing "canmount" property to "on" should not always remount dataset 2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once Reviewed by: George Wilson Reviewed by: Chris Siden Reviewed by: Garrett D'Amore Reviewed by: Bill Pijewski Reviewed by: Dan Kruchinin Approved by: Eric Schrock References: illumos/illumos-gate@4445fffbbb1ea25fd0e9ea68b9380dd7a6709025 Ported-by: Tim Chase Closes #1293 Porting notes: Add zvol minor device creation to the new zfs_snapshot_nvl function. Remove the logging of the "release" operation in dsl_dataset_user_release_sync(). The logging caused a null dereference because ds->ds_dir is zeroed in dsl_dataset_destroy_sync() and the logging functions try to get the ds name via the dsl_dataset_name() function. I've got no idea why this particular code would have worked in Illumos. This code has subsequently been completely reworked in Illumos commit 3b2aab1 (3464 zfs synctask code needs restructuring). Squash some "may be used uninitialized" warning/erorrs. Fix some printf format warnings for %lld and %llu. Apply a few spa_writeable() changes that were made to Illumos in illumos/illumos-gate.git@cd1c8b8 as part of the 3112, 3113, 3114 and 3115 fixes. Add a missing call to fnvlist_free(nvl) in log_internal() that was added in Illumos to fix issue 3085 but couldn't be ported to ZoL at the time (zfsonlinux/zfs@9e11c73) because it depended on future work. --- cmd/zdb/zdb.c | 41 +- cmd/zfs/Makefile.am | 3 +- cmd/zfs/zfs_main.c | 101 +- cmd/zhack/zhack.c | 7 + cmd/zpool/zpool_main.c | 178 ++-- cmd/ztest/ztest.c | 29 +- configure.ac | 1 + include/Makefile.am | 1 + include/libzfs.h | 23 +- include/libzfs_core.h | 62 ++ include/libzfs_impl.h | 10 +- include/sys/dmu.h | 24 +- include/sys/dmu_objset.h | 11 +- include/sys/dsl_dataset.h | 7 +- include/sys/dsl_deleg.h | 5 +- include/sys/dsl_prop.h | 3 +- include/sys/fs/zfs.h | 80 +- include/sys/rrwlock.h | 4 + include/sys/spa.h | 32 +- include/sys/zfs_context.h | 1 + include/sys/zfs_ioctl.h | 31 +- include/zfs_comutil.h | 4 +- lib/Makefile.am | 2 +- lib/libzfs/Makefile.am | 1 + lib/libzfs/libzfs_config.c | 4 +- lib/libzfs/libzfs_dataset.c | 372 ++++---- lib/libzfs/libzfs_diff.c | 8 +- lib/libzfs/libzfs_fru.c | 2 +- lib/libzfs/libzfs_graph.c | 4 +- lib/libzfs/libzfs_import.c | 2 +- lib/libzfs/libzfs_iter.c | 12 +- lib/libzfs/libzfs_pool.c | 114 ++- lib/libzfs/libzfs_sendrecv.c | 20 +- lib/libzfs/libzfs_util.c | 24 +- lib/libzfs_core/Makefile.am | 15 + lib/libzfs_core/libzfs_core.c | 477 ++++++++++ lib/libzpool/kernel.c | 6 + man/man8/zfs.8 | 10 +- module/nvpair/fnvpair.c | 1 + module/zcommon/zfs_comutil.c | 7 +- module/zcommon/zprop_common.c | 6 +- module/zfs/dmu_objset.c | 310 ++++--- module/zfs/dmu_send.c | 110 +-- module/zfs/dmu_tx.c | 2 +- module/zfs/dsl_dataset.c | 145 ++- module/zfs/dsl_deleg.c | 28 +- module/zfs/dsl_dir.c | 67 +- module/zfs/dsl_prop.c | 27 +- module/zfs/dsl_scan.c | 4 +- module/zfs/dsl_synctask.c | 23 +- module/zfs/rrwlock.c | 13 + module/zfs/spa.c | 54 +- module/zfs/spa_history.c | 228 +++-- module/zfs/zfs_ctldir.c | 3 +- module/zfs/zfs_ioctl.c | 1622 ++++++++++++++++++++++----------- module/zfs/zfs_vfsops.c | 6 +- 56 files changed, 2873 insertions(+), 1514 deletions(-) create mode 100644 include/libzfs_core.h create mode 100644 lib/libzfs_core/Makefile.am create mode 100644 lib/libzfs_core/libzfs_core.c diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 82491ad5f2f3..060498ae2c50 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -57,6 +57,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #include @@ -209,6 +210,27 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) nvlist_free(nv); } +/* ARGSUSED */ +static void +dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) +{ + spa_history_phys_t *shp = data; + + if (shp == NULL) + return; + + (void) printf("\t\tpool_create_len = %llu\n", + (u_longlong_t)shp->sh_pool_create_len); + (void) printf("\t\tphys_max_off = %llu\n", + (u_longlong_t)shp->sh_phys_max_off); + (void) printf("\t\tbof = %llu\n", + (u_longlong_t)shp->sh_bof); + (void) printf("\t\teof = %llu\n", + (u_longlong_t)shp->sh_eof); + (void) printf("\t\trecords_lost = %llu\n", + (u_longlong_t)shp->sh_records_lost); +} + static void zdb_nicenum(uint64_t num, char *buf) { @@ -869,21 +891,22 @@ dump_history(spa_t *spa) for (i = 0; i < num; i++) { uint64_t time, txg, ievent; char *cmd, *intstr; + boolean_t printed = B_FALSE; if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, &time) != 0) - continue; + goto next; if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, &cmd) != 0) { if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_INT_EVENT, &ievent) != 0) - continue; + goto next; verify(nvlist_lookup_uint64(events[i], ZPOOL_HIST_TXG, &txg) == 0); verify(nvlist_lookup_string(events[i], ZPOOL_HIST_INT_STR, &intstr) == 0); - if (ievent >= LOG_END) - continue; + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) + goto next; (void) snprintf(internalstr, sizeof (internalstr), @@ -896,6 +919,14 @@ dump_history(spa_t *spa) (void) localtime_r(&tsec, &t); (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); (void) printf("%s %s\n", tbuf, cmd); + printed = B_TRUE; + +next: + if (dump_opt['h'] > 1) { + if (!printed) + (void) printf("unrecognized record:\n"); + dump_nvlist(events[i], 2); + } } } @@ -1529,7 +1560,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { dump_zap, /* other ZAP */ dump_zap, /* persistent error log */ dump_uint8, /* SPA history */ - dump_uint64, /* SPA history offsets */ + dump_history_offsets, /* SPA history offsets */ dump_zap, /* Pool properties */ dump_zap, /* DSL permissions */ dump_acl, /* ZFS ACL */ diff --git a/cmd/zfs/Makefile.am b/cmd/zfs/Makefile.am index 8f381f190603..c1499a2a8971 100644 --- a/cmd/zfs/Makefile.am +++ b/cmd/zfs/Makefile.am @@ -16,6 +16,7 @@ zfs_LDADD = \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libuutil/libuutil.la \ $(top_builddir)/lib/libzpool/libzpool.la \ - $(top_builddir)/lib/libzfs/libzfs.la + $(top_builddir)/lib/libzfs/libzfs.la \ + $(top_builddir)/lib/libzfs_core/libzfs_core.la zfs_LDFLAGS = -pthread -lm $(ZLIB) -lrt -ldl $(LIBUUID) $(LIBBLKID) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 5753ccea0d8e..7176c94169b5 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -72,6 +73,7 @@ libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; +static boolean_t log_history = B_TRUE; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -261,7 +263,7 @@ get_usage(zfs_help_t idx) return (gettext("\tshare <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot|snap [-r] [-o property=value] ... " - "\n")); + " ...\n")); case HELP_UNMOUNT: return (gettext("\tunmount [-f] " "<-a | filesystem|mountpoint>\n")); @@ -892,9 +894,9 @@ typedef struct destroy_cbdata { nvlist_t *cb_nvl; /* first snap in contiguous run */ - zfs_handle_t *cb_firstsnap; + char *cb_firstsnap; /* previous snap in contiguous run */ - zfs_handle_t *cb_prevsnap; + char *cb_prevsnap; int64_t cb_snapused; char *cb_snapspec; } destroy_cbdata_t; @@ -1008,11 +1010,13 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg) if (nvlist_exists(cb->cb_nvl, name)) { if (cb->cb_firstsnap == NULL) - cb->cb_firstsnap = zfs_handle_dup(zhp); + cb->cb_firstsnap = strdup(name); if (cb->cb_prevsnap != NULL) - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); /* this snap continues the current range */ - cb->cb_prevsnap = zfs_handle_dup(zhp); + cb->cb_prevsnap = strdup(name); + if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL) + nomem(); if (cb->cb_verbose) { if (cb->cb_parsable) { (void) printf("destroy\t%s\n", name); @@ -1027,12 +1031,12 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg) } else if (cb->cb_firstsnap != NULL) { /* end of this range */ uint64_t used = 0; - err = zfs_get_snapused_int(cb->cb_firstsnap, + err = lzc_snaprange_space(cb->cb_firstsnap, cb->cb_prevsnap, &used); cb->cb_snapused += used; - zfs_close(cb->cb_firstsnap); + free(cb->cb_firstsnap); cb->cb_firstsnap = NULL; - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); cb->cb_prevsnap = NULL; } zfs_close(zhp); @@ -1049,13 +1053,13 @@ destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) if (cb->cb_firstsnap != NULL) { uint64_t used = 0; if (err == 0) { - err = zfs_get_snapused_int(cb->cb_firstsnap, + err = lzc_snaprange_space(cb->cb_firstsnap, cb->cb_prevsnap, &used); } cb->cb_snapused += used; - zfs_close(cb->cb_firstsnap); + free(cb->cb_firstsnap); cb->cb_firstsnap = NULL; - zfs_close(cb->cb_prevsnap); + free(cb->cb_prevsnap); cb->cb_prevsnap = NULL; } return (err); @@ -1908,9 +1912,11 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data) /* * If they did "zfs upgrade -a", then we could * be doing ioctls to different pools. We need - * to log this history once to each pool. + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). */ - verify(zpool_stage_history(g_zfs, history_str) == 0); + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; } if (zfs_prop_set(zhp, "version", verstr) == 0) cb->cb_numupgraded++; @@ -3422,6 +3428,32 @@ zfs_do_set(int argc, char **argv) return (ret); } +typedef struct snap_cbdata { + nvlist_t *sd_nvl; + boolean_t sd_recursive; + const char *sd_snapname; +} snap_cbdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snap_cbdata_t *sd = arg; + char *name; + int rv = 0; + int error; + + error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + if (error == -1) + nomem(); + fnvlist_add_boolean(sd->sd_nvl, name); + free(name); + + if (sd->sd_recursive) + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + zfs_close(zhp); + return (rv); +} + /* * zfs snapshot [-r] [-o prop=value] ... * @@ -3431,13 +3463,16 @@ zfs_do_set(int argc, char **argv) static int zfs_do_snapshot(int argc, char **argv) { - boolean_t recursive = B_FALSE; int ret = 0; signed char c; nvlist_t *props; + snap_cbdata_t sd = { 0 }; + boolean_t multiple_snaps = B_FALSE; if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) nomem(); + if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ while ((c = getopt(argc, argv, "ro:")) != -1) { @@ -3447,7 +3482,8 @@ zfs_do_snapshot(int argc, char **argv) return (1); break; case 'r': - recursive = B_TRUE; + sd.sd_recursive = B_TRUE; + multiple_snaps = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -3464,18 +3500,35 @@ zfs_do_snapshot(int argc, char **argv) (void) fprintf(stderr, gettext("missing snapshot argument\n")); goto usage; } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - goto usage; + + if (argc > 1) + multiple_snaps = B_TRUE; + for (; argc > 0; argc--, argv++) { + char *atp; + zfs_handle_t *zhp; + + atp = strchr(argv[0], '@'); + if (atp == NULL) + goto usage; + *atp = '\0'; + sd.sd_snapname = atp + 1; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + goto usage; + if (zfs_snapshot_cb(zhp, &sd) != 0) + goto usage; } - ret = zfs_snapshot(g_zfs, argv[0], recursive, props); + ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); nvlist_free(props); - if (ret && recursive) + if (ret != 0 && multiple_snaps) (void) fprintf(stderr, gettext("no snapshots were created\n")); return (ret != 0); usage: + nvlist_free(sd.sd_nvl); nvlist_free(props); usage(B_FALSE); return (-1); @@ -6384,8 +6437,7 @@ main(int argc, char **argv) mnttab_file = g_zfs->libzfs_mnttab; - zpool_set_history_str("zfs", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); libzfs_print_on_error(g_zfs, B_TRUE); @@ -6408,6 +6460,9 @@ main(int argc, char **argv) } libzfs_fini(g_zfs); + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + /* * The 'ZFS_ABORT' environment variable causes us to dump core on exit * for the purposes of running ::findleaks. diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index b2cf815ca57c..4f80dde6957b 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -279,6 +279,9 @@ feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx) zfeature_info_t *feature = arg2; spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "zhack enable feature", tx, + "name=%s can_readonly=%u", + feature->fi_guid, feature->fi_can_readonly); } static void @@ -356,6 +359,8 @@ feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx) zfeature_info_t *feature = arg2; spa_feature_incr(spa, feature, tx); + spa_history_log_internal(spa, "zhack feature incr", tx, + "name=%s", feature->fi_guid); } static void @@ -365,6 +370,8 @@ feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx) zfeature_info_t *feature = arg2; spa_feature_decr(spa, feature, tx); + spa_history_log_internal(spa, "zhack feature decr", tx, + "name=%s", feature->fi_guid); } static void diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index b96fbe4ace5b..152f289594d0 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -196,9 +196,9 @@ static zpool_command_t command_table[] = { #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) -zpool_command_t *current_command; +static zpool_command_t *current_command; static char history_str[HIS_MAX_RECORD_LEN]; - +static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; static const char * @@ -1119,7 +1119,10 @@ zpool_do_destroy(int argc, char **argv) return (1); } - ret = (zpool_destroy(zhp) != 0); + /* The history must be logged as part of the export */ + log_history = B_FALSE; + + ret = (zpool_destroy(zhp, history_str) != 0); zpool_close(zhp); @@ -1183,10 +1186,13 @@ zpool_do_export(int argc, char **argv) continue; } + /* The history must be logged as part of the export */ + log_history = B_FALSE; + if (hardforce) { - if (zpool_export_force(zhp) != 0) + if (zpool_export_force(zhp, history_str) != 0) ret = 1; - } else if (zpool_export(zhp, force) != 0) { + } else if (zpool_export(zhp, force, history_str) != 0) { ret = 1; } @@ -4585,13 +4591,6 @@ upgrade_cb(zpool_handle_t *zhp, void *arg) return (ret); printnl = B_TRUE; -#if 0 - /* - * XXX: This code can be enabled when Illumos commit - * 4445fffbbb1ea25fd0e9ea68b9380dd7a6709025 is merged. - * It reworks the history logging among other things. - */ - /* * If they did "zpool upgrade -a", then we could * be doing ioctls to different pools. We need @@ -4600,7 +4599,6 @@ upgrade_cb(zpool_handle_t *zhp, void *arg) */ (void) zpool_log_history(g_zfs, history_str); log_history = B_FALSE; -#endif } if (cbp->cb_version >= SPA_VERSION_FEATURES) { @@ -4700,6 +4698,14 @@ upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) (void) printf(gettext(" %s\n"), fname); } + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; } } @@ -4955,8 +4961,8 @@ zpool_do_upgrade(int argc, char **argv) typedef struct hist_cbdata { boolean_t first; - int longfmt; - int internal; + boolean_t longfmt; + boolean_t internal; } hist_cbdata_t; /* @@ -4968,21 +4974,8 @@ get_history_one(zpool_handle_t *zhp, void *data) nvlist_t *nvhis; nvlist_t **records; uint_t numrecords; - char *cmdstr; - char *pathstr; - uint64_t dst_time; - time_t tsec; - struct tm t; - char tbuf[30]; int ret, i; - uint64_t who; - struct passwd *pwd; - char *hostname; - char *zonename; - char internalstr[MAXPATHLEN]; hist_cbdata_t *cb = (hist_cbdata_t *)data; - uint64_t txg; - uint64_t ievent; cb->first = B_FALSE; @@ -4994,62 +4987,93 @@ get_history_one(zpool_handle_t *zhp, void *data) verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); for (i = 0; i < numrecords; i++) { - if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME, - &dst_time) != 0) - continue; + nvlist_t *rec = records[i]; + char tbuf[30] = ""; - /* is it an internal event or a standard event? */ - if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD, - &cmdstr) != 0) { - if (cb->internal == 0) - continue; + if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { + time_t tsec; + struct tm t; - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_INT_EVENT, &ievent) != 0) + tsec = fnvlist_lookup_uint64(records[i], + ZPOOL_HIST_TIME); + (void) localtime_r(&tsec, &t); + (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); + } + + if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { + (void) printf("%s %s", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { + int ievent = + fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); + if (!cb->internal) continue; - verify(nvlist_lookup_uint64(records[i], - ZPOOL_HIST_TXG, &txg) == 0); - verify(nvlist_lookup_string(records[i], - ZPOOL_HIST_INT_STR, &pathstr) == 0); - if (ievent >= LOG_END) + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { + (void) printf("%s unrecognized record:\n", + tbuf); + dump_nvlist(rec, 4); continue; - (void) snprintf(internalstr, - sizeof (internalstr), - "[internal %s txg:%llu] %s", - zfs_history_event_names[ievent], (u_longlong_t)txg, - pathstr); - cmdstr = internalstr; + } + (void) printf("%s [internal %s txg:%lld] %s", tbuf, + zfs_history_event_names[ievent], + (long long int)fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { + if (!cb->internal) + continue; + (void) printf("%s [txg:%lld] %s", tbuf, + (long long int)fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); + if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { + (void) printf(" %s (%llu)", + fnvlist_lookup_string(rec, + ZPOOL_HIST_DSNAME), + (long long unsigned int)fnvlist_lookup_uint64(rec, + ZPOOL_HIST_DSID)); + } + (void) printf(" %s", fnvlist_lookup_string(rec, + ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { + if (!cb->internal) + continue; + (void) printf("%s ioctl %s\n", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); + if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { + (void) printf(" input:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_INPUT_NVL), 8); + } + if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { + (void) printf(" output:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_OUTPUT_NVL), 8); + } + } else { + if (!cb->internal) + continue; + (void) printf("%s unrecognized record:\n", tbuf); + dump_nvlist(rec, 4); } - tsec = dst_time; - (void) localtime_r(&tsec, &t); - (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); - (void) printf("%s %s", tbuf, cmdstr); if (!cb->longfmt) { (void) printf("\n"); continue; } (void) printf(" ["); - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_WHO, &who) == 0) { - pwd = getpwuid((uid_t)who); - if (pwd) - (void) printf("user %s on", - pwd->pw_name); - else - (void) printf("user %d on", - (int)who); - } else { - (void) printf(gettext("no info]\n")); - continue; + if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { + uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); + struct passwd *pwd = getpwuid(who); + (void) printf("user %d ", (int)who); + if (pwd != NULL) + (void) printf("(%s) ", pwd->pw_name); } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_HOST, &hostname) == 0) { - (void) printf(" %s", hostname); + if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { + (void) printf("on %s", + fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_ZONE, &zonename) == 0) { - (void) printf(":%s", zonename); + if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { + (void) printf(":%s", + fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); } (void) printf("]"); @@ -5066,8 +5090,6 @@ get_history_one(zpool_handle_t *zhp, void *data) * * Displays the history of commands that modified pools. */ - - int zpool_do_history(int argc, char **argv) { @@ -5080,10 +5102,10 @@ zpool_do_history(int argc, char **argv) while ((c = getopt(argc, argv, "li")) != -1) { switch (c) { case 'l': - cbdata.longfmt = 1; + cbdata.longfmt = B_TRUE; break; case 'i': - cbdata.internal = 1; + cbdata.internal = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -5637,8 +5659,7 @@ main(int argc, char **argv) libzfs_print_on_error(g_zfs, B_TRUE); - zpool_set_history_str("zpool", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); /* * Run the appropriate command. @@ -5666,6 +5687,9 @@ main(int argc, char **argv) ret = 1; } + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + libzfs_fini(g_zfs); /* diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 93a5f1ecf54a..28570a09075c 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2334,7 +2334,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); + spa_create("ztest_bad_file", nvroot, NULL, NULL)); nvlist_free(nvroot); /* @@ -2342,7 +2342,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); + spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); nvlist_free(nvroot); /* @@ -2351,7 +2351,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ (void) rw_enter(&ztest_name_lock, RW_READER); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); + VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -2409,7 +2409,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) props = fnvlist_alloc(); fnvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0); + VERIFY3S(spa_create(name, nvroot, props, NULL), ==, 0); fnvlist_free(nvroot); fnvlist_free(props); @@ -3222,8 +3222,7 @@ ztest_snapshot_create(char *osname, uint64_t id) (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, (u_longlong_t)id); - error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1); if (error == ENOSPC) { ztest_record_enospc(FTAG); return (B_FALSE); @@ -3457,8 +3456,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, (u_longlong_t)id); - error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3481,8 +3479,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); } - error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3491,8 +3488,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); } - error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); if (error && error != EEXIST) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -4749,8 +4745,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) * Create snapshot, clone it, mark snap for deferred destroy, * destroy clone, verify snap was also destroyed. */ - error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, - FALSE, -1); + error = dmu_objset_snapshot_one(osname, snapname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_snapshot"); @@ -4792,8 +4787,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) * destroy a held snapshot, mark for deferred destroy, * release hold, verify snapshot was destroyed. */ - error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, - FALSE, -1); + error = dmu_objset_snapshot_one(osname, snapname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_snapshot"); @@ -5938,8 +5932,7 @@ ztest_init(ztest_shared_t *zs) VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); free(buf); } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, - NULL, NULL)); + VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); diff --git a/configure.ac b/configure.ac index 58e21587051a..08b807e3f16c 100644 --- a/configure.ac +++ b/configure.ac @@ -88,6 +88,7 @@ AC_CONFIG_FILES([ lib/libuutil/Makefile lib/libzpool/Makefile lib/libzfs/Makefile + lib/libzfs_core/Makefile lib/libshare/Makefile cmd/Makefile cmd/zdb/Makefile diff --git a/include/Makefile.am b/include/Makefile.am index 64141d9bce30..2e1c31aaf81c 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -18,6 +18,7 @@ USER_H = \ $(top_srcdir)/include/libuutil.h \ $(top_srcdir)/include/libuutil_impl.h \ $(top_srcdir)/include/libzfs.h \ + $(top_srcdir)/include/libzfs_core.h \ $(top_srcdir)/include/libzfs_impl.h EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H) diff --git a/include/libzfs.h b/include/libzfs.h index 3472b7699360..d51a71d7598b 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -63,7 +63,8 @@ extern char *zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE]; /* * libzfs errors */ -enum { +typedef enum zfs_error { + EZFS_SUCCESS = 0, /* no error -- success */ EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ @@ -135,7 +136,7 @@ enum { EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_UNKNOWN -}; +} zfs_error_t; /* * The following data structures are all part @@ -191,6 +192,9 @@ extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); +extern void zfs_save_arguments(int argc, char **, char *, int); +extern int zpool_log_history(libzfs_handle_t *, const char *); + extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); @@ -226,7 +230,7 @@ extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); -extern int zpool_destroy(zpool_handle_t *); +extern int zpool_destroy(zpool_handle_t *, const char *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { @@ -350,8 +354,8 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); /* * Import and export functions */ -extern int zpool_export(zpool_handle_t *, boolean_t); -extern int zpool_export_force(zpool_handle_t *); +extern int zpool_export(zpool_handle_t *, boolean_t, const char *); +extern int zpool_export_force(zpool_handle_t *, const char *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, @@ -385,7 +389,7 @@ extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, */ struct zfs_cmd; -extern const char *zfs_history_event_names[LOG_END]; +extern const char *zfs_history_event_names[]; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, boolean_t verbose); @@ -393,9 +397,6 @@ extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); -extern void zpool_set_history_str(const char *subcommand, int argc, - char **argv, char *history_str); -extern int zpool_stage_history(libzfs_handle_t *, const char *); extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int); extern int zpool_events_clear(libzfs_handle_t *, int *); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, @@ -450,8 +451,6 @@ extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, char *buf, size_t len); -extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, - uint64_t *usedp); extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); @@ -567,6 +566,8 @@ extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); +extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, + nvlist_t *props); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t); diff --git a/include/libzfs_core.h b/include/libzfs_core.h new file mode 100644 index 000000000000..9edc884a14d1 --- /dev/null +++ b/include/libzfs_core.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#ifndef _LIBZFS_CORE_H +#define _LIBZFS_CORE_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int libzfs_core_init(void); +void libzfs_core_fini(void); + +int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist); +int lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props); +int lzc_clone(const char *fsname, const char *origin, nvlist_t *props); +int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist); + +int lzc_snaprange_space(const char *firstsnap, const char *lastsnap, + uint64_t *usedp); + +int lzc_send(const char *snapname, const char *fromsnap, int fd); +int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd); +int lzc_send_space(const char *snapname, const char *fromsnap, + uint64_t *result); + +boolean_t lzc_exists(const char *dataset); + + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBZFS_CORE_H */ diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index fabcb1183144..7daf8348b228 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -21,11 +21,11 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ -#ifndef _LIBFS_IMPL_H -#define _LIBFS_IMPL_H +#ifndef _LIBZFS_IMPL_H +#define _LIBZFS_IMPL_H #include #include @@ -36,6 +36,7 @@ #include #include #include +#include #if defined(HAVE_LIBTOPO) #include @@ -69,7 +70,6 @@ struct libzfs_handle { int libzfs_desc_active; char libzfs_action[1024]; char libzfs_desc[1024]; - char *libzfs_log_str; int libzfs_printerr; int libzfs_storeerr; /* stuff error messages into buffer */ void *libzfs_sharehdl; /* libshare handle */ @@ -219,4 +219,4 @@ extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); } #endif -#endif /* _LIBFS_IMPL_H */ +#endif /* _LIBZFS_IMPL_H */ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index adaab4c6bb61..c50df391ecb3 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -213,16 +214,6 @@ typedef enum dmu_object_type { DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), } dmu_object_type_t; -typedef enum dmu_objset_type { - DMU_OST_NONE, - DMU_OST_META, - DMU_OST_ZFS, - DMU_OST_ZVOL, - DMU_OST_OTHER, /* For testing only! */ - DMU_OST_ANY, /* Be careful! */ - DMU_OST_NUMTYPES -} dmu_objset_type_t; - void byteswap_uint64_array(void *buf, size_t size); void byteswap_uint32_array(void *buf, size_t size); void byteswap_uint16_array(void *buf, size_t size); @@ -267,9 +258,11 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, uint64_t flags); int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); +int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, + struct nvlist *errlist); +int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *); +int dmu_objset_snapshot_one(const char *fsname, const char *snapname); +int dmu_objset_snapshot_tmp(const char *, const char *, int); int dmu_objset_rename(const char *name, const char *newname, boolean_t recursive); int dmu_objset_find(char *name, int func(const char *, void *), void *arg, @@ -792,10 +785,9 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, +int dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, struct vnode *vp, offset_t *off); -int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign, - uint64_t *sizep); +int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep); typedef struct dmu_recv_cookie { /* diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 507f73222895..79d3a6bc05a9 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -140,24 +141,14 @@ void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); -int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, - uint64_t flags); -int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd); void dmu_objset_stats(objset_t *os, nvlist_t *nv); void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dmu_objset_fsid_guid(objset_t *os); -int dmu_objset_find(char *name, int func(const char *, void *), void *arg, - int flags); int dmu_objset_find_spa(spa_t *spa, const char *name, int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); int dmu_objset_prefetch(const char *name, void *arg); -void dmu_objset_byteswap(void *buf, size_t size); int dmu_objset_evict_dbufs(objset_t *os); timestruc_t dmu_objset_snap_cmtime(objset_t *os); diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index afcf2b7946f0..735ccbbd3058 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -180,7 +180,7 @@ struct dsl_ds_destroyarg { struct dsl_ds_holdarg { dsl_sync_task_group_t *dstg; - char *htag; + const char *htag; char *snapname; boolean_t recursive; boolean_t gotone; @@ -215,12 +215,11 @@ uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx); int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); -int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer); dsl_checkfunc_t dsl_dataset_destroy_check; dsl_syncfunc_t dsl_dataset_destroy_sync; -dsl_checkfunc_t dsl_dataset_snapshot_check; -dsl_syncfunc_t dsl_dataset_snapshot_sync; dsl_syncfunc_t dsl_dataset_user_hold_sync; +int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); +void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h index 9db6d07e87e7..5842639aafba 100644 --- a/include/sys/dsl_deleg.h +++ b/include/sys/dsl_deleg.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DELEG_H @@ -65,8 +65,7 @@ extern "C" { int dsl_deleg_get(const char *ddname, nvlist_t **nvp); int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset); int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr); -int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent, - const char *perm, cred_t *cr); +int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr); void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr); int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr); int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr); diff --git a/include/sys/dsl_prop.h b/include/sys/dsl_prop.h index a636ad35096b..b0d9a52cdfd7 100644 --- a/include/sys/dsl_prop.h +++ b/include/sys/dsl_prop.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_PROP_H @@ -89,8 +90,6 @@ dsl_syncfunc_t dsl_props_set_sync; int dsl_prop_set(const char *ddname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf); int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); -void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, - dmu_tx_t *tx); void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, zprop_source_t source, uint64_t *value); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 26c24fc003aa..164bf3591cb6 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -52,6 +52,16 @@ typedef enum { ZFS_TYPE_POOL = 0x8 } zfs_type_t; +typedef enum dmu_objset_type { + DMU_OST_NONE, + DMU_OST_META, + DMU_OST_ZFS, + DMU_OST_ZVOL, + DMU_OST_OTHER, /* For testing only! */ + DMU_OST_ANY, /* Be careful! */ + DMU_OST_NUMTYPES +} dmu_objset_type_t; + #define ZFS_TYPE_DATASET \ (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) @@ -753,10 +763,10 @@ typedef struct ddt_histogram { /* * /dev/zfs ioctl numbers. */ -#define ZFS_IOC ('Z' << 8) - typedef enum zfs_ioc { - ZFS_IOC_POOL_CREATE = ZFS_IOC, + ZFS_IOC_FIRST = ('Z' << 8), + ZFS_IOC = ZFS_IOC_FIRST, + ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST, ZFS_IOC_POOL_DESTROY, ZFS_IOC_POOL_IMPORT, ZFS_IOC_POOL_EXPORT, @@ -793,7 +803,7 @@ typedef enum zfs_ioc { ZFS_IOC_ERROR_LOG, ZFS_IOC_CLEAR, ZFS_IOC_PROMOTE, - ZFS_IOC_DESTROY_SNAPS_NVL, + ZFS_IOC_DESTROY_SNAPS, ZFS_IOC_SNAPSHOT, ZFS_IOC_DSOBJ_TO_DSNAME, ZFS_IOC_OBJ_TO_PATH, @@ -823,6 +833,11 @@ typedef enum zfs_ioc { ZFS_IOC_SPACE_SNAPS, ZFS_IOC_POOL_REOPEN, ZFS_IOC_SEND_PROGRESS, + ZFS_IOC_LOG_HISTORY, + ZFS_IOC_SEND_NEW, + ZFS_IOC_SEND_SPACE, + ZFS_IOC_CLONE, + ZFS_IOC_LAST } zfs_ioc_t; /* @@ -864,6 +879,12 @@ typedef enum { #define ZPOOL_HIST_TXG "history txg" #define ZPOOL_HIST_INT_EVENT "history internal event" #define ZPOOL_HIST_INT_STR "history internal str" +#define ZPOOL_HIST_INT_NAME "internal_name" +#define ZPOOL_HIST_IOCTL "ioctl" +#define ZPOOL_HIST_INPUT_NVL "in_nvl" +#define ZPOOL_HIST_OUTPUT_NVL "out_nvl" +#define ZPOOL_HIST_DSNAME "dsname" +#define ZPOOL_HIST_DSID "dsid" /* * Flags for ZFS_IOC_VDEV_SET_STATE @@ -909,57 +930,6 @@ typedef enum { #define ZFS_EV_VDEV_PATH "vdev_path" #define ZFS_EV_VDEV_GUID "vdev_guid" -/* - * Note: This is encoded on-disk, so new events must be added to the - * end, and unused events can not be removed. Be sure to edit - * libzfs_pool.c: hist_event_table[]. - */ -typedef enum history_internal_events { - LOG_NO_EVENT = 0, - LOG_POOL_CREATE, - LOG_POOL_VDEV_ADD, - LOG_POOL_REMOVE, - LOG_POOL_DESTROY, - LOG_POOL_EXPORT, - LOG_POOL_IMPORT, - LOG_POOL_VDEV_ATTACH, - LOG_POOL_VDEV_REPLACE, - LOG_POOL_VDEV_DETACH, - LOG_POOL_VDEV_ONLINE, - LOG_POOL_VDEV_OFFLINE, - LOG_POOL_UPGRADE, - LOG_POOL_CLEAR, - LOG_POOL_SCAN, - LOG_POOL_PROPSET, - LOG_DS_CREATE, - LOG_DS_CLONE, - LOG_DS_DESTROY, - LOG_DS_DESTROY_BEGIN, - LOG_DS_INHERIT, - LOG_DS_PROPSET, - LOG_DS_QUOTA, - LOG_DS_PERM_UPDATE, - LOG_DS_PERM_REMOVE, - LOG_DS_PERM_WHO_REMOVE, - LOG_DS_PROMOTE, - LOG_DS_RECEIVE, - LOG_DS_RENAME, - LOG_DS_RESERVATION, - LOG_DS_REPLAY_INC_SYNC, - LOG_DS_REPLAY_FULL_SYNC, - LOG_DS_ROLLBACK, - LOG_DS_SNAPSHOT, - LOG_DS_UPGRADE, - LOG_DS_REFQUOTA, - LOG_DS_REFRESERV, - LOG_POOL_SCAN_DONE, - LOG_DS_USER_HOLD, - LOG_DS_USER_RELEASE, - LOG_POOL_SPLIT, - LOG_POOL_GUID_CHANGE, - LOG_END -} history_internal_events_t; - #ifdef __cplusplus } #endif diff --git a/include/sys/rrwlock.h b/include/sys/rrwlock.h index 798a015d19e2..8fde3a3beb58 100644 --- a/include/sys/rrwlock.h +++ b/include/sys/rrwlock.h @@ -22,6 +22,9 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_RR_RW_LOCK_H #define _SYS_RR_RW_LOCK_H @@ -69,6 +72,7 @@ void rrw_destroy(rrwlock_t *rrl); void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag); void rrw_exit(rrwlock_t *rrl, void *tag); boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); +void rrw_tsd_destroy(void *arg); #define RRW_READ_HELD(x) rrw_held(x, RW_READER) #define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER) diff --git a/include/sys/spa.h b/include/sys/spa.h index 8f2af8af2b70..1af9137f8d00 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -52,6 +52,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t; typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; +struct dsl_dataset; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -419,7 +420,7 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - const char *history_str, nvlist_t *zplprops); + nvlist_t *zplprops); extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); @@ -629,31 +630,20 @@ extern boolean_t spa_writeable(spa_t *spa); extern int spa_mode(spa_t *spa); extern uint64_t strtonum(const char *str, char **nptr); -/* history logging */ -typedef enum history_log_type { - LOG_CMD_POOL_CREATE, - LOG_CMD_NORMAL, - LOG_INTERNAL -} history_log_type_t; - -typedef struct history_arg { - char *ha_history_str; - history_log_type_t ha_log_type; - history_internal_events_t ha_event; - char *ha_zone; - uid_t ha_uid; -} history_arg_t; - extern char *spa_his_ievent_table[]; extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx); extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read, char *his_buf); -extern int spa_history_log(spa_t *spa, const char *his_buf, - history_log_type_t what); -extern void spa_history_log_internal(history_internal_events_t event, - spa_t *spa, dmu_tx_t *tx, const char *fmt, ...); -extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); +extern int spa_history_log(spa_t *spa, const char *his_buf); +extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl); +extern void spa_history_log_version(spa_t *spa, const char *operation); +extern void spa_history_log_internal(spa_t *spa, const char *operation, + dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op, + dmu_tx_t *tx, const char *fmt, ...); +extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...); /* error handling */ struct zbookmark; diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 4b342595f335..599b97a9b6c9 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -293,6 +293,7 @@ extern void rw_exit(krwlock_t *rwlp); #define rw_downgrade(rwlp) do { } while (0) extern uid_t crgetuid(cred_t *cr); +extern uid_t crgetruid(cred_t *cr); extern gid_t crgetgid(cred_t *cr); extern int crgetngroups(cred_t *cr); extern gid_t *crgetgroups(cred_t *cr); diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index c0cb4705d487..21bfe2b8f9b0 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_IOCTL_H @@ -40,6 +41,15 @@ extern "C" { #endif +/* + * The structures in this file are passed between userland and the + * kernel. Userland may be running a 32-bit process, while the kernel + * is 64-bit. Therefore, these structures need to compile the same in + * 32-bit and 64-bit. This means not using type "long", and adding + * explicit padding so that the 32-bit structure will not be packed more + * tightly than the 64-bit structure (which requires 64-bit alignment). + */ + /* * Property values for snapdir */ @@ -277,22 +287,29 @@ typedef enum zfs_case { } zfs_case_t; typedef struct zfs_cmd { - char zc_name[MAXPATHLEN]; + char zc_name[MAXPATHLEN]; /* name of pool or dataset */ + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ + int zc_pad2; + + /* + * The following members are for legacy ioctls which haven't been + * converted to the new method. + */ + uint64_t zc_history; /* really (char *) */ char zc_value[MAXPATHLEN * 2]; char zc_string[MAXNAMELEN]; char zc_top_ds[MAXPATHLEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; - uint64_t zc_nvlist_src; /* really (char *) */ - uint64_t zc_nvlist_src_size; - uint64_t zc_nvlist_dst; /* really (char *) */ - uint64_t zc_nvlist_dst_size; uint64_t zc_cookie; uint64_t zc_objset_type; uint64_t zc_perm_action; - uint64_t zc_history; /* really (char *) */ - uint64_t zc_history_len; + uint64_t zc_history_len; uint64_t zc_history_offset; uint64_t zc_obj; uint64_t zc_iflags; /* internal to zfs(7fs) */ diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h index 61327f9aa909..f89054388a4d 100644 --- a/include/zfs_comutil.h +++ b/include/zfs_comutil.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _ZFS_COMUTIL_H @@ -37,7 +38,8 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *); extern int zfs_zpl_version_map(int spa_version); extern int zfs_spa_version_map(int zpl_version); -extern const char *zfs_history_event_names[LOG_END]; +#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41 +extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS]; #ifdef __cplusplus } diff --git a/lib/Makefile.am b/lib/Makefile.am index 09139d58c40a..8e7caf2a16e9 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -4,4 +4,4 @@ SUBDIRS = libspl libavl libefi libshare libunicode # These four libraries, which are installed as the final build product, # incorporate the five convenience libraries given above. -SUBDIRS += libuutil libnvpair libzpool libzfs +SUBDIRS += libuutil libnvpair libzpool libzfs_core libzfs diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am index 524efaa601f7..14abd7ab3cb8 100644 --- a/lib/libzfs/Makefile.am +++ b/lib/libzfs/Makefile.am @@ -22,6 +22,7 @@ libzfs_la_SOURCES = \ $(top_srcdir)/lib/libzfs/libzfs_util.c libzfs_la_LIBADD = \ + $(top_builddir)/lib/libzfs_core/libzfs_core.la \ $(top_builddir)/lib/libshare/libshare.la \ $(top_builddir)/lib/libnvpair/libnvpair.la \ $(top_builddir)/lib/libzpool/libzpool.la diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c index ee94fe106d73..99b6e67c37c2 100644 --- a/lib/libzfs/libzfs_config.c +++ b/lib/libzfs/libzfs_config.c @@ -106,7 +106,7 @@ namespace_reload(libzfs_handle_t *hdl) nvlist_t *config; config_node_t *cn; nvpair_t *elem; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; void *cookie; if (hdl->libzfs_ns_gen == 0) { @@ -261,7 +261,7 @@ zpool_get_features(zpool_handle_t *zhp) int zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int error; nvlist_t *config; libzfs_handle_t *hdl = zhp->zpool_hdl; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 244b687eddd5..041750bca8c3 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -313,7 +313,7 @@ get_recvd_props_ioctl(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *recvdprops; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int err; if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) @@ -376,7 +376,7 @@ static int get_stats(zfs_handle_t *zhp) { int rc = 0; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) return (-1); @@ -439,7 +439,7 @@ make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) zfs_handle_t * make_dataset_handle(libzfs_handle_t *hdl, const char *path) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); @@ -1427,15 +1427,14 @@ zfs_is_namespace_prop(zfs_prop_t prop) int zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int ret = -1; prop_changelist_t *cl = NULL; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *nvl = NULL, *realprops; zfs_prop_t prop; - boolean_t do_prefix; - uint64_t idx; + boolean_t do_prefix = B_TRUE; int added_resv = 0; (void) snprintf(errbuf, sizeof (errbuf), @@ -1474,12 +1473,17 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) } /* - * If the dataset's canmount property is being set to noauto, - * then we want to prevent unmounting & remounting it. + * We don't want to unmount & remount the dataset when changing + * its canmount property to 'on' or 'noauto'. We only use + * the changelist logic to unmount when setting canmount=off. */ - do_prefix = !((prop == ZFS_PROP_CANMOUNT) && - (zprop_string_to_index(prop, propval, &idx, - ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO)); + if (prop == ZFS_PROP_CANMOUNT) { + uint64_t idx; + int err = zprop_string_to_index(prop, propval, &idx, + ZFS_TYPE_DATASET); + if (err == 0 && idx != ZFS_CANMOUNT_OFF) + do_prefix = B_FALSE; + } if (do_prefix && (ret = changelist_prefix(cl)) != 0) goto error; @@ -1549,7 +1553,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) int zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int ret; prop_changelist_t *cl; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -1724,7 +1728,7 @@ static int get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, char **source, uint64_t *val) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; nvlist_t *zplprops = NULL; struct mnttab mnt; char *mntopt_on = NULL; @@ -2576,7 +2580,7 @@ zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue, zfs_userquota_prop_t *typep) { int err; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2636,7 +2640,7 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) { int err; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; const char *snapname; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2686,25 +2690,6 @@ zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, return (0); } -int -zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap, - uint64_t *usedp) -{ - int err; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; - - (void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value)); - - err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc); - if (err) - return (err); - - *usedp = zc.zc_cookie; - - return (0); -} - /* * Returns the name of the given zfs handle. */ @@ -2775,7 +2760,7 @@ static int check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, boolean_t accept_ancestor, int *prefixlen) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char parent[ZFS_MAXNAMELEN]; char *slash; zfs_handle_t *zhp; @@ -2905,7 +2890,6 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) */ for (cp = target + prefixlen + 1; (cp = strchr(cp, '/')); *cp = '/', cp++) { - char *logstr; *cp = '\0'; @@ -2916,16 +2900,12 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) continue; } - logstr = hdl->libzfs_log_str; - hdl->libzfs_log_str = NULL; if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM, NULL) != 0) { - hdl->libzfs_log_str = logstr; opname = dgettext(TEXT_DOMAIN, "create"); goto ancestorerr; } - hdl->libzfs_log_str = logstr; h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM); if (h == NULL) { opname = dgettext(TEXT_DOMAIN, "open"); @@ -2983,12 +2963,12 @@ int zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, nvlist_t *props) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); char errbuf[1024]; uint64_t zoned; + dmu_objset_type_t ost; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); @@ -3008,17 +2988,16 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, * will return ENOENT, not EEXIST. To prevent this from happening, we * first try to see if the dataset exists. */ - (void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name)); - if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "dataset already exists")); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } if (type == ZFS_TYPE_VOLUME) - zc.zc_objset_type = DMU_OST_ZVOL; + ost = DMU_OST_ZVOL; else - zc.zc_objset_type = DMU_OST_ZFS; + ost = DMU_OST_ZFS; if (props && (props = zfs_valid_proplist(hdl, type, props, zoned, NULL, errbuf)) == 0) @@ -3070,12 +3049,9 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, } } - if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) - return (-1); - nvlist_free(props); - /* create the dataset */ - ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); + ret = lzc_create(path, ost, props); + nvlist_free(props); if (ret == 0 && type == ZFS_TYPE_VOLUME) { ret = zvol_create_link(hdl, path); @@ -3084,13 +3060,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, dgettext(TEXT_DOMAIN, "Volume successfully created, but device links " "were not created")); - zcmd_free_nvlists(&zc); return (-1); } } - zcmd_free_nvlists(&zc); - /* check for failure */ if (ret != 0) { char parent[ZFS_MAXNAMELEN]; @@ -3147,7 +3120,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, int zfs_destroy(zfs_handle_t *zhp, boolean_t defer) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -3241,33 +3214,36 @@ int zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) { int ret; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + nvlist_t *errlist; - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0) - return (-1); - zc.zc_defer_destroy = defer; + ret = lzc_destroy_snaps(snaps, defer, &errlist); - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc); if (ret != 0) { - char errbuf[1024]; - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot destroy snapshots in %s"), zc.zc_name); - - switch (errno) { - case EEXIST: - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "snapshot is cloned")); - return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf)); + nvpair_t *pair; + for (pair = nvlist_next_nvpair(errlist, NULL); + pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), + nvpair_name(pair)); - default: - return (zfs_standard_error(zhp->zfs_hdl, errno, - errbuf)); + switch (fnvpair_value_int32(pair)) { + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, + "snapshot is cloned")); + ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, + errbuf); + break; + default: + ret = zfs_standard_error(zhp->zfs_hdl, errno, + errbuf); + break; + } } } - return (0); + return (ret); } /* @@ -3276,12 +3252,10 @@ zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) int zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char parent[ZFS_MAXNAMELEN]; int ret; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_type_t type; uint64_t zoned; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); @@ -3300,32 +3274,21 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) (void) parent_name(target, parent, sizeof (parent)); /* do the clone */ - if (ZFS_IS_VOLUME(zhp)) { - zc.zc_objset_type = DMU_OST_ZVOL; - type = ZFS_TYPE_VOLUME; - } else { - zc.zc_objset_type = DMU_OST_ZFS; - type = ZFS_TYPE_FILESYSTEM; - } if (props) { + zfs_type_t type; + if (ZFS_IS_VOLUME(zhp)) { + type = ZFS_TYPE_VOLUME; + } else { + type = ZFS_TYPE_FILESYSTEM; + } if ((props = zfs_valid_proplist(hdl, type, props, zoned, zhp, errbuf)) == NULL) return (-1); - - if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { - nvlist_free(props); - return (-1); - } - - nvlist_free(props); } - (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc); - - zcmd_free_nvlists(&zc); + ret = lzc_clone(target, zhp->zfs_name, props); + nvlist_free(props); if (ret != 0) { switch (errno) { @@ -3425,7 +3388,7 @@ int zfs_promote(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char parent[MAXPATHLEN]; char *cp; int ret; @@ -3540,94 +3503,159 @@ zfs_create_link_cb(zfs_handle_t *zhp, void *arg) return (ret); } +typedef struct snapdata { + nvlist_t *sd_nvl; + const char *sd_snapname; +} snapdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snapdata_t *sd = arg; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + + fnvlist_add_boolean(sd->sd_nvl, name); + + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + zfs_close(zhp); + return (rv); +} + /* - * Takes a snapshot of the given dataset. + * Creates snapshots. The keys in the snaps nvlist are the snapshots to be + * created. */ int -zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, - nvlist_t *props) +zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) { - const char *delim; - char parent[ZFS_MAXNAMELEN]; - zfs_handle_t *zhp; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int ret; char errbuf[1024]; + nvpair_t *elem; + nvlist_t *errors; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot snapshot '%s'"), path); - - /* validate the target name */ - if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + "cannot create snapshots ")); - if (props) { - if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, errbuf)) == NULL) - return (-1); + elem = NULL; + while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) { + const char *snapname = nvpair_name(elem); - if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) { - nvlist_free(props); - return (-1); + /* validate the target name */ + if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT, + B_TRUE)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), snapname); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } - - nvlist_free(props); } - /* make sure the parent exists and is of the appropriate type */ - delim = strchr(path, '@'); - (void) strncpy(parent, path, delim - path); - parent[delim - path] = '\0'; - - if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME)) == NULL) { - zcmd_free_nvlists(&zc); + if (props != NULL && + (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, + props, B_FALSE, NULL, errbuf)) == NULL) { return (-1); } - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value)); - if (ZFS_IS_VOLUME(zhp)) - zc.zc_objset_type = DMU_OST_ZVOL; - else - zc.zc_objset_type = DMU_OST_ZFS; - zc.zc_cookie = recursive; - ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc); + ret = lzc_snapshot(snaps, props, &errors); - zcmd_free_nvlists(&zc); + if (ret != 0) { + boolean_t printed = B_FALSE; + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create snapshot '%s'"), nvpair_name(elem)); + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); + printed = B_TRUE; + } + if (!printed) { + switch (ret) { + case EXDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "multiple snapshots of same " + "fs not allowed")); + (void) zfs_error(hdl, EZFS_EXISTS, errbuf); - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ - if (ret != 0) - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } + } else { + zfs_handle_t *zhp; + int linktries = 0, linkok = 0, linkfail = 0; + nvpair_t *snap; - if (ret == 0 && recursive) { - struct createdata cd; + for (snap = nvlist_next_nvpair(snaps, NULL); snap != NULL; + snap = nvlist_next_nvpair(snaps, snap)) { + char *cp, *snapname; - cd.cd_snapname = delim + 1; - cd.cd_ifexists = B_FALSE; - (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd); - } - if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(zhp->zfs_hdl, path); - if (ret != 0) { - (void) zfs_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, - "Volume successfully snapshotted, but device links " - "were not created")); - zfs_close(zhp); - return (-1); + snapname = nvpair_name(snap); + cp = strchr(snapname, '@'); + *cp = '\0'; + + if ((zhp = zfs_open(hdl, snapname, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME)) != NULL) { + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + ++linktries; + *cp = '@'; + if (zvol_create_link(zhp->zfs_hdl, nvpair_name(snap))) + ++linkfail; + else + ++linkok; + } + } } } - if (ret != 0) - (void) zfs_standard_error(hdl, errno, errbuf); + nvlist_free(props); + nvlist_free(errors); + return (ret); +} - zfs_close(zhp); +int +zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, + nvlist_t *props) +{ + int ret; + snapdata_t sd = { 0 }; + char fsname[ZFS_MAXNAMELEN]; + char *cp; + zfs_handle_t *zhp; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot snapshot %s"), path); + + if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE)) + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + + (void) strlcpy(fsname, path, sizeof (fsname)); + cp = strchr(fsname, '@'); + *cp = '\0'; + sd.sd_snapname = cp + 1; + if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME)) == NULL) { + return (-1); + } + + verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0); + if (recursive) { + (void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd); + } else { + fnvlist_add_boolean(sd.sd_nvl, path); + } + + ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); + zfs_close(zhp); return (ret); } @@ -3655,17 +3683,13 @@ rollback_destroy(zfs_handle_t *zhp, void *data) zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { - char *logstr; cbp->cb_dependent = B_TRUE; cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE, rollback_destroy, cbp); cbp->cb_dependent = B_FALSE; - logstr = zhp->zfs_hdl->libzfs_log_str; - zhp->zfs_hdl->libzfs_log_str = NULL; cbp->cb_error |= zfs_destroy(zhp, B_FALSE); - zhp->zfs_hdl->libzfs_log_str = logstr; } } else { /* We must destroy this clone; first unmount it */ @@ -3702,7 +3726,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) { rollback_data_t cb = { 0 }; int err; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; boolean_t restore_resv = 0; uint64_t old_volsize = 0, new_volsize; zfs_prop_t resv_prop = { 0 }; @@ -3789,7 +3813,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, boolean_t force_unmount) { int ret; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char *delim; prop_changelist_t *cl = NULL; zfs_handle_t *zhrp = NULL; @@ -4008,7 +4032,7 @@ zvol_create_link(libzfs_handle_t *hdl, const char *dataset) static int zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char path[MAXPATHLEN]; int error; @@ -4072,7 +4096,7 @@ zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) int zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int timeout = 3000; /* in milliseconds */ int error = 0; int i; @@ -4265,7 +4289,7 @@ static int zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, zfs_smb_acl_op_t cmd, char *resource1, char *resource2) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; nvlist_t *nvlist = NULL; int error; @@ -4347,7 +4371,7 @@ int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zfs_userspace_cb_t func, void *arg) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_useracct_t buf[100]; libzfs_handle_t *hdl = zhp->zfs_hdl; int ret; @@ -4389,7 +4413,7 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, int cleanup_fd, uint64_t dsobj, uint64_t createtxg) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; ASSERT(!recursive || dsobj == 0); @@ -4447,7 +4471,7 @@ int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -4487,7 +4511,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, int zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; @@ -4554,7 +4578,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) int zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; char *nvbuf; char errbuf[ZFS_MAXNAMELEN+32]; @@ -4608,7 +4632,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) int zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index 77d5a09ec63c..d8ef6ff025b1 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -90,7 +90,7 @@ static int get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, char *pn, int maxlen, zfs_stat_t *sb) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int error; (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); @@ -379,7 +379,7 @@ describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf, static int write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *lhdl = di->zhp->zfs_hdl; char fobjname[MAXPATHLEN]; @@ -507,7 +507,7 @@ static int make_temp_snapshot(differ_info_t *di) { libzfs_handle_t *hdl = di->zhp->zfs_hdl; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) snprintf(zc.zc_value, sizeof (zc.zc_value), ZDIFF_PREFIX, getpid()); @@ -749,7 +749,7 @@ int zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, const char *tosnap, int flags) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char errbuf[1024]; differ_info_t di = { 0 }; pthread_t tid; diff --git a/lib/libzfs/libzfs_fru.c b/lib/libzfs/libzfs_fru.c index 78f2f9c371ae..aa84aa30deb5 100644 --- a/lib/libzfs/libzfs_fru.c +++ b/lib/libzfs/libzfs_fru.c @@ -361,7 +361,7 @@ libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) int zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) { - zfs_cmd_t zc = { 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c index 0e538e3ded84..3c5bdcc6767c 100644 --- a/lib/libzfs/libzfs_graph.c +++ b/lib/libzfs/libzfs_graph.c @@ -379,7 +379,7 @@ zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, static int iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_vertex_t *zvp; /* @@ -473,7 +473,7 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) static boolean_t external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; /* * Check whether this dataset is a clone or has clones since diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 9e79bd934899..af6a43d8388f 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -365,7 +365,7 @@ static nvlist_t * refresh_config(libzfs_handle_t *hdl, nvlist_t *config) { nvlist_t *nvl; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int err; if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c index 8215d3cb17af..ff76f9f367e2 100644 --- a/lib/libzfs/libzfs_iter.c +++ b/lib/libzfs/libzfs_iter.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -103,7 +103,7 @@ zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) int zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_handle_t *nzhp; int ret; @@ -140,7 +140,7 @@ int zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, void *data) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_handle_t *nzhp; int ret; @@ -306,12 +306,11 @@ int zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, zfs_iter_f func, void *arg) { - char buf[ZFS_MAXNAMELEN]; - char *comma_separated, *cp; + char *buf, *comma_separated, *cp; int err = 0; int ret = 0; - (void) strlcpy(buf, spec_orig, sizeof (buf)); + buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig); cp = buf; while ((comma_separated = strsep(&cp, ",")) != NULL) { @@ -369,6 +368,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, } } + free(buf); return (ret); } diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index a6cacd370c2c..45c39cc0facd 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -63,7 +64,7 @@ typedef struct prop_flags { static int zpool_get_all_props(zpool_handle_t *zhp) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); @@ -691,7 +692,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, int zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int ret = -1; char errbuf[1024]; nvlist_t *nvl = NULL; @@ -1140,7 +1141,7 @@ int zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t *fsprops) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; char msg[1024]; @@ -1272,9 +1273,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, * datasets left in the pool. */ int -zpool_destroy(zpool_handle_t *zhp) +zpool_destroy(zpool_handle_t *zhp, const char *log_str) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_handle_t *zfp = NULL; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; @@ -1284,6 +1285,7 @@ zpool_destroy(zpool_handle_t *zhp) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1317,7 +1319,7 @@ zpool_destroy(zpool_handle_t *zhp) int zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int ret; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; @@ -1440,10 +1442,11 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) * Exports the pool from the system. The caller must ensure that there are no * mounted datasets in the pool. */ -int -zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) +static int +zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, + const char *log_str) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1452,6 +1455,7 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = force; zc.zc_guid = hardforce; + zc.zc_history = (uint64_t)(uintptr_t)log_str; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { switch (errno) { @@ -1473,15 +1477,15 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) } int -zpool_export(zpool_handle_t *zhp, boolean_t force) +zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str) { - return (zpool_export_common(zhp, force, B_FALSE)); + return (zpool_export_common(zhp, force, B_FALSE, log_str)); } int -zpool_export_force(zpool_handle_t *zhp) +zpool_export_force(zpool_handle_t *zhp, const char *log_str) { - return (zpool_export_common(zhp, B_TRUE, B_TRUE)); + return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str)); } static void @@ -1717,7 +1721,7 @@ int zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, nvlist_t *props, int flags) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zpool_rewind_policy_t policy; nvlist_t *nv = NULL; nvlist_t *nvinfo = NULL; @@ -1909,7 +1913,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, int zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2385,7 +2389,7 @@ int zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, vdev_state_t *newstate) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; @@ -2469,7 +2473,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, int zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; @@ -2519,7 +2523,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) int zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2554,7 +2558,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) int zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2608,7 +2612,7 @@ int zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; int ret; nvlist_t *tgt; @@ -2784,7 +2788,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, int zpool_vdev_detach(zpool_handle_t *zhp, const char *path) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; @@ -2882,7 +2886,7 @@ int zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, nvlist_t *props, splitflags_t flags) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; nvlist_t **varray = NULL, *zc_props = NULL; @@ -3093,7 +3097,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, int zpool_vdev_remove(zpool_handle_t *zhp, const char *path) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; @@ -3138,7 +3142,7 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path) int zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; nvlist_t *tgt; zpool_rewind_policy_t policy; @@ -3214,7 +3218,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) int zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -3240,7 +3244,7 @@ zpool_reguid(zpool_handle_t *zhp) { char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); @@ -3258,7 +3262,7 @@ zpool_reguid(zpool_handle_t *zhp) int zpool_reopen(zpool_handle_t *zhp) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -3338,7 +3342,7 @@ path_to_devid(const char *path) static void set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value)); @@ -3513,7 +3517,7 @@ zbookmark_compare(const void *a, const void *b) int zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; uint64_t count; zbookmark_t *zb = NULL; int i; @@ -3609,7 +3613,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) int zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strcpy(zc.zc_name, zhp->zpool_name); @@ -3623,40 +3627,32 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) } void -zpool_set_history_str(const char *subcommand, int argc, char **argv, - char *history_str) +zfs_save_arguments(int argc, char **argv, char *string, int len) { int i; - (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN); + (void) strlcpy(string, basename(argv[0]), len); for (i = 1; i < argc; i++) { - if (strlen(history_str) + 1 + strlen(argv[i]) > - HIS_MAX_RECORD_LEN) - break; - (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN); - (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN); + (void) strlcat(string, " ", len); + (void) strlcat(string, argv[i], len); } } -/* - * Stage command history for logging. - */ int -zpool_stage_history(libzfs_handle_t *hdl, const char *history_str) +zpool_log_history(libzfs_handle_t *hdl, const char *message) { - if (history_str == NULL) - return (EINVAL); - - if (strlen(history_str) > HIS_MAX_RECORD_LEN) - return (EINVAL); - - if (hdl->libzfs_log_str != NULL) - free(hdl->libzfs_log_str); - - if ((hdl->libzfs_log_str = strdup(history_str)) == NULL) - return (no_memory(hdl)); - - return (0); + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "message", message); + err = zcmd_write_src_nvlist(hdl, &zc, args); + if (err == 0) + err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc); + nvlist_free(args); + zcmd_free_nvlists(&zc); + return (err); } /* @@ -3671,7 +3667,7 @@ zpool_stage_history(libzfs_handle_t *hdl, const char *history_str) static int get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); @@ -3808,7 +3804,7 @@ int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int *dropped, int block, int cleanup_fd) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int error = 0; *nvp = NULL; @@ -3867,7 +3863,7 @@ zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int zpool_events_clear(libzfs_handle_t *hdl, int *count) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -3886,7 +3882,7 @@ void zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; boolean_t mounted = B_FALSE; char *mntpnt = NULL; char dsname[MAXNAMELEN]; diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 9dbfb1641ac6..5d0ab0eb4b71 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -812,7 +812,7 @@ static int estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, boolean_t fromorigin, uint64_t *sizep) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); @@ -876,7 +876,7 @@ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, boolean_t fromorigin, int outfd, nvlist_t *debugnv) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *thisdbg; @@ -992,7 +992,7 @@ send_progress_thread(void *arg) { progress_arg_t *pa = arg; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zfs_handle_t *zhp = pa->pa_zhp; libzfs_handle_t *hdl = zhp->zfs_hdl; unsigned long long bytes; @@ -1195,7 +1195,7 @@ dump_filesystem(zfs_handle_t *zhp, void *arg) int rv = 0; send_dump_data_t *sdd = arg; boolean_t missingfrom = B_FALSE; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->tosnap); @@ -1683,7 +1683,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, int baselen, char *newname, recvflags_t *flags) { static int seq; - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int err; prop_changelist_t *clp; zfs_handle_t *zhp; @@ -1756,7 +1756,7 @@ static int recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, char *newname, recvflags_t *flags) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; int err = 0; prop_changelist_t *clp; zfs_handle_t *zhp; @@ -2015,7 +2015,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, stream_originguid, originguid)) { case 1: { /* promote it! */ - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; nvlist_t *origin_nvfs; char *origin_fsname; @@ -2087,7 +2087,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", &props) && 0 == nvlist_lookup_nvlist(props, stream_snapname, &props)) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; zc.zc_cookie = B_TRUE; /* received */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), @@ -2518,7 +2518,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { - zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; time_t begin_time; int ioctl_err, ioctl_errno, err; char *cp; @@ -2892,7 +2892,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zcmd_free_nvlists(&zc); if (err == 0 && snapprops_nvlist) { - zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 }; + zfs_cmd_t zc2 = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; (void) strcpy(zc2.zc_name, zc.zc_value); zc2.zc_cookie = B_TRUE; /* received */ diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 5bb88e94677b..54dc2afc5b07 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -45,6 +45,7 @@ #include #include +#include #include "libzfs_impl.h" #include "zfs_prop.h" @@ -712,6 +713,14 @@ libzfs_init(void) hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r"); + if (libzfs_core_init() != 0) { + (void) close(hdl->libzfs_fd); + (void) fclose(hdl->libzfs_mnttab); + (void) fclose(hdl->libzfs_sharetab); + free(hdl); + return (NULL); + } + zfs_prop_init(); zpool_prop_init(); zpool_feature_init(); @@ -733,12 +742,11 @@ libzfs_fini(libzfs_handle_t *hdl) if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); zfs_uninit_libshare(hdl); - if (hdl->libzfs_log_str) - (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); libzfs_fru_clear(hdl, B_TRUE); namespace_clear(hdl); libzfs_mnttab_fini(hdl); + libzfs_core_fini(); free(hdl); } @@ -1061,17 +1069,7 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp) int zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc) { - int error; - - zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str; - error = ioctl(hdl->libzfs_fd, request, zc); - if (hdl->libzfs_log_str) { - free(hdl->libzfs_log_str); - hdl->libzfs_log_str = NULL; - } - zc->zc_history = 0; - - return (error); + return (ioctl(hdl->libzfs_fd, request, zc)); } /* diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am new file mode 100644 index 000000000000..180292de1407 --- /dev/null +++ b/lib/libzfs_core/Makefile.am @@ -0,0 +1,15 @@ +include $(top_srcdir)/config/Rules.am + +DEFAULT_INCLUDES += \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/lib/libspl/include + +lib_LTLIBRARIES = libzfs_core.la + +libzfs_core_la_SOURCES = \ + $(top_srcdir)/lib/libzfs_core/libzfs_core.c + +libzfs_core_la_LIBADD = \ + $(top_builddir)/lib/libnvpair/libnvpair.la + +libzfs_core_la_LDFLAGS = -pthread -version-info 1:1:0 diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c new file mode 100644 index 000000000000..bff6902caaf9 --- /dev/null +++ b/lib/libzfs_core/libzfs_core.c @@ -0,0 +1,477 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. + * It has the following characteristics: + * + * - Thread Safe. libzfs_core is accessible concurrently from multiple + * threads. This is accomplished primarily by avoiding global data + * (e.g. caching). Since it's thread-safe, there is no reason for a + * process to have multiple libzfs "instances". Therefore, we store + * our few pieces of data (e.g. the file descriptor) in global + * variables. The fd is reference-counted so that the libzfs_core + * library can be "initialized" multiple times (e.g. by different + * consumers within the same process). + * + * - Committed Interface. The libzfs_core interface will be committed, + * therefore consumers can compile against it and be confident that + * their code will continue to work on future releases of this code. + * Currently, the interface is Evolving (not Committed), but we intend + * to commit to it once it is more complete and we determine that it + * meets the needs of all consumers. + * + * - Programatic Error Handling. libzfs_core communicates errors with + * defined error numbers, and doesn't print anything to stdout/stderr. + * + * - Thin Layer. libzfs_core is a thin layer, marshaling arguments + * to/from the kernel ioctls. There is generally a 1:1 correspondence + * between libzfs_core functions and ioctls to /dev/zfs. + * + * - Clear Atomicity. Because libzfs_core functions are generally 1:1 + * with kernel ioctls, and kernel ioctls are general atomic, each + * libzfs_core function is atomic. For example, creating multiple + * snapshots with a single call to lzc_snapshot() is atomic -- it + * can't fail with only some of the requested snapshots created, even + * in the event of power loss or system crash. + * + * - Continued libzfs Support. Some higher-level operations (e.g. + * support for "zfs send -R") are too complicated to fit the scope of + * libzfs_core. This functionality will continue to live in libzfs. + * Where appropriate, libzfs will use the underlying atomic operations + * of libzfs_core. For example, libzfs may implement "zfs send -R | + * zfs receive" by using individual "send one snapshot", rename, + * destroy, and "receive one snapshot" operations in libzfs_core. + * /sbin/zfs and /zbin/zpool will link with both libzfs and + * libzfs_core. Other consumers should aim to use only libzfs_core, + * since that will be the supported, stable interface going forwards. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int g_fd; +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; +static int g_refcount; + +int +libzfs_core_init(void) +{ + (void) pthread_mutex_lock(&g_lock); + if (g_refcount == 0) { + g_fd = open("/dev/zfs", O_RDWR); + if (g_fd < 0) { + (void) pthread_mutex_unlock(&g_lock); + return (errno); + } + } + g_refcount++; + (void) pthread_mutex_unlock(&g_lock); + return (0); +} + +void +libzfs_core_fini(void) +{ + (void) pthread_mutex_lock(&g_lock); + ASSERT3S(g_refcount, >, 0); + g_refcount--; + if (g_refcount == 0) + (void) close(g_fd); + (void) pthread_mutex_unlock(&g_lock); +} + +static int +lzc_ioctl(zfs_ioc_t ioc, const char *name, + nvlist_t *source, nvlist_t **resultp) +{ + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + int error = 0; + char *packed; + size_t size; + + ASSERT3S(g_refcount, >, 0); + + (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); + + packed = fnvlist_pack(source, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + + if (resultp != NULL) { + zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); + if (zc.zc_nvlist_dst == (uint64_t)0) { + error = ENOMEM; + goto out; + } + } + + while (ioctl(g_fd, ioc, &zc) != 0) { + if (errno == ENOMEM && resultp != NULL) { + free((void *)(uintptr_t)zc.zc_nvlist_dst); + zc.zc_nvlist_dst_size *= 2; + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); + if (zc.zc_nvlist_dst == (uint64_t)0) { + error = ENOMEM; + goto out; + } + } else { + error = errno; + break; + } + } + if (zc.zc_nvlist_dst_filled) { + *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size); + } else if (resultp != NULL) { + *resultp = NULL; + } + +out: + fnvlist_pack_free(packed, size); + free((void *)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} + +int +lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", type); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +int +lzc_clone(const char *fsname, const char *origin, + nvlist_t *props) +{ + int error; + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(args); + return (error); +} + +/* + * Creates snapshots. + * + * The keys in the snaps nvlist are the snapshots to be created. + * They must all be in the same pool. + * + * The props nvlist is properties to set. Currently only user properties + * are supported. { user:prop_name -> string value } + * + * The returned results nvlist will have an entry for each snapshot that failed. + * The value will be the (int32) error code. + * + * The return value will be 0 if all snapshots were created, otherwise it will + * be the errno of a (undetermined) snapshot that failed. + */ +int +lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + *errlist = NULL; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (props != NULL) + fnvlist_add_nvlist(args, "props", props); + + error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); + nvlist_free(args); + + return (error); +} + +/* + * Destroys snapshots. + * + * The keys in the snaps nvlist are the snapshots to be destroyed. + * They must all be in the same pool. + * + * Snapshots that do not exist will be silently ignored. + * + * If 'defer' is not set, and a snapshot has user holds or clones, the + * destroy operation will fail and none of the snapshots will be + * destroyed. + * + * If 'defer' is set, and a snapshot has user holds or clones, it will be + * marked for deferred destruction, and will be destroyed when the last hold + * or clone is removed/destroyed. + * + * The return value will be 0 if all snapshots were destroyed (or marked for + * later destruction if 'defer' is set) or didn't exist to begin with. + * + * Otherwise the return value will be the errno of a (undetermined) snapshot + * that failed, no snapshots will be destroyed, and the errlist will have an + * entry for each snapshot that failed. The value in the errlist will be + * the (int32) error code. + */ +int +lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) +{ + nvpair_t *elem; + nvlist_t *args; + int error; + char pool[MAXNAMELEN]; + + /* determine the pool name */ + elem = nvlist_next_nvpair(snaps, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "snaps", snaps); + if (defer) + fnvlist_add_boolean(args, "defer"); + + error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); + nvlist_free(args); + + return (error); + +} + +int +lzc_snaprange_space(const char *firstsnap, const char *lastsnap, + uint64_t *usedp) +{ + nvlist_t *args; + nvlist_t *result; + int err; + char fs[MAXNAMELEN]; + char *atp; + + /* determine the fs name */ + (void) strlcpy(fs, firstsnap, sizeof (fs)); + atp = strchr(fs, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_string(args, "firstsnap", firstsnap); + + err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); + nvlist_free(args); + if (err == 0) + *usedp = fnvlist_lookup_uint64(result, "used"); + fnvlist_free(result); + + return (err); +} + +boolean_t +lzc_exists(const char *dataset) +{ + /* + * The objset_stats ioctl is still legacy, so we need to construct our + * own zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be sent. + */ +int +lzc_send(const char *snapname, const char *fromsnap, int fd) +{ + nvlist_t *args; + int err; + + args = fnvlist_alloc(); + fnvlist_add_int32(args, "fd", fd); + if (fromsnap != NULL) + fnvlist_add_string(args, "fromsnap", fromsnap); + err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); + nvlist_free(args); + return (err); +} + +/* + * If fromsnap is NULL, a full (non-incremental) stream will be estimated. + */ +int +lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) +{ + nvlist_t *args; + nvlist_t *result; + int err; + + args = fnvlist_alloc(); + if (fromsnap != NULL) + fnvlist_add_string(args, "fromsnap", fromsnap); + err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); + nvlist_free(args); + if (err == 0) + *spacep = fnvlist_lookup_uint64(result, "space"); + nvlist_free(result); + return (err); +} + +static int +recv_read(int fd, void *buf, int ilen) +{ + char *cp = buf; + int rv; + int len = ilen; + + do { + rv = read(fd, cp, len); + cp += rv; + len -= rv; + } while (rv > 0); + + if (rv < 0 || len != 0) + return (EIO); + + return (0); +} + +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot. Apply the specified properties a "received" properties + * (which can be overridden by locally-set properties). If the stream is a + * clone, its origin snapshot must be specified by 'origin'. The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + /* + * The receive ioctl is still legacy, so we need to construct our own + * zfs_cmd_t rather than using zfsc_ioctl(). + */ + zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + char *atp; + char *packed = NULL; + size_t size; + dmu_replay_record_t drr; + int error; + + ASSERT3S(g_refcount, >, 0); + + /* zc_name is name of containing filesystem */ + (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); + atp = strchr(zc.zc_name, '@'); + if (atp == NULL) + return (EINVAL); + *atp = '\0'; + + /* if the fs does not exist, try its parent. */ + if (!lzc_exists(zc.zc_name)) { + char *slashp = strrchr(zc.zc_name, '/'); + if (slashp == NULL) + return (ENOENT); + *slashp = '\0'; + + } + + /* zc_value is full name of the snapshot to create */ + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + + if (props != NULL) { + /* zc_nvlist_src is props to set */ + packed = fnvlist_pack(props, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } + + /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ + if (origin != NULL) + (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); + + /* zc_begin_record is non-byteswapped BEGIN record */ + error = recv_read(fd, &drr, sizeof (drr)); + if (error != 0) + goto out; + zc.zc_begin_record = drr.drr_u.drr_begin; + + /* zc_cookie is fd to read from */ + zc.zc_cookie = fd; + + /* zc guid is force flag */ + zc.zc_guid = force; + + /* zc_cleanup_fd is unused */ + zc.zc_cleanup_fd = -1; + + error = ioctl(g_fd, ZFS_IOC_RECV, &zc); + if (error != 0) + error = errno; + +out: + if (packed != NULL) + fnvlist_pack_free(packed, size); + free((void*)(uintptr_t)zc.zc_nvlist_dst); + return (error); +} diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index f7aeeb440c06..0293b5eb5a2d 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1082,6 +1082,12 @@ crgetuid(cred_t *cr) return (0); } +uid_t +crgetruid(cred_t *cr) +{ + return (0); +} + gid_t crgetgid(cred_t *cr) { diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index a0089e699c21..bd8bbfdcf69a 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -58,8 +58,8 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... - \fIfilesystem@snapname\fR|\fIvolume@snapname\fR +\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... + \fIfilesystem@snapname\fR|\fIvolume@snapname\fR ... .fi .LP @@ -1558,11 +1558,11 @@ behavior for mounted file systems in use. .ne 2 .mk .na -\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR +\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR ... .ad .sp .6 .RS 4n -Creates a snapshot with the given name. All previous modifications by successful system calls to the file system are part of the snapshot. See the "Snapshots" section for details. +Creates snapshots with the given names. All previous modifications by successful system calls to the file system are part of the snapshots. Snapshots are taken atomically, so that all snapshots correspond to the same moment in time. See the "Snapshots" section for details. .sp .ne 2 .mk @@ -1571,7 +1571,7 @@ Creates a snapshot with the given name. All previous modifications by successful .ad .sp .6 .RS 4n -Recursively create snapshots of all descendent datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time. +Recursively create snapshots of all descendent datasets. .RE .sp diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c index 17583715bb53..7faea0fceb9a 100644 --- a/module/nvpair/fnvpair.c +++ b/module/nvpair/fnvpair.c @@ -501,6 +501,7 @@ EXPORT_SYMBOL(fnvlist_alloc); EXPORT_SYMBOL(fnvlist_free); EXPORT_SYMBOL(fnvlist_size); EXPORT_SYMBOL(fnvlist_pack); +EXPORT_SYMBOL(fnvlist_pack_free); EXPORT_SYMBOL(fnvlist_unpack); EXPORT_SYMBOL(fnvlist_dup); EXPORT_SYMBOL(fnvlist_merge); diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index ccf169be6dae..6d0314fa78d0 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -157,7 +158,11 @@ zfs_spa_version_map(int zpl_version) return (version); } -const char *zfs_history_event_names[LOG_END] = { +/* + * This is the table of legacy internal event names; it should not be modified. + * The internal events are now stored in the history log as strings. + */ +const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { "invalid event", "pool create", "vdev add", diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c index 0a0af2334a08..6d9f89a989dd 100644 --- a/module/zcommon/zprop_common.c +++ b/module/zcommon/zprop_common.c @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ /* * Common routines used by zfs and zpool property management. @@ -129,7 +132,8 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type, zprop_attr_t attr, int objset_types, const char *colname) { zprop_register_impl(prop, name, type, 0, NULL, attr, - objset_types, NULL, colname, B_FALSE, B_FALSE, NULL); + objset_types, NULL, colname, + type == PROP_TYPE_NUMBER, B_FALSE, NULL); } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 52d55d566895..0f07a4cc95d2 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -525,9 +526,10 @@ dmu_objset_evict_dbufs(objset_t *os) void dmu_objset_evict(objset_t *os) { - dsl_dataset_t *ds = os->os_dsl_dataset; int t; + dsl_dataset_t *ds = os->os_dsl_dataset; + for (t = 0; t < TXG_SIZE; t++) ASSERT(!dmu_objset_is_dirty(os, t)); @@ -698,30 +700,33 @@ dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) spa_t *spa = dd->dd_pool->dp_spa; struct oscarg *oa = arg2; uint64_t obj; + dsl_dataset_t *ds; + blkptr_t *bp; ASSERT(dmu_tx_is_syncing(tx)); obj = dsl_dataset_create_sync(dd, oa->lastname, oa->clone_origin, oa->flags, oa->cr, tx); - if (oa->clone_origin == NULL) { - dsl_pool_t *dp = dd->dd_pool; - dsl_dataset_t *ds; - blkptr_t *bp; - objset_t *os; - - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); - bp = dsl_dataset_get_blkptr(ds); - ASSERT(BP_IS_HOLE(bp)); - - os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); + bp = dsl_dataset_get_blkptr(ds); + if (BP_IS_HOLE(bp)) { + objset_t *os = + dmu_objset_create_impl(spa, ds, bp, oa->type, tx); if (oa->userfunc) oa->userfunc(os, oa->userarg, oa->cr, tx); - dsl_dataset_rele(ds, FTAG); } - spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj); + if (oa->clone_origin == NULL) { + spa_history_log_internal_ds(ds, "create", tx, ""); + } else { + char namebuf[MAXNAMELEN]; + dsl_dataset_name(oa->clone_origin, namebuf); + spa_history_log_internal_ds(ds, "clone", tx, + "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); + } + dsl_dataset_rele(ds, FTAG); } int @@ -798,34 +803,40 @@ dmu_objset_destroy(const char *name, boolean_t defer) return (error); } -struct snaparg { - dsl_sync_task_group_t *dstg; - char *snapname; - char *htag; - char failed[MAXPATHLEN]; - boolean_t recursive; - boolean_t needsuspend; - boolean_t temporary; - nvlist_t *props; - struct dsl_ds_holdarg *ha; /* only needed in the temporary case */ - dsl_dataset_t *newds; -}; +typedef struct snapallarg { + dsl_sync_task_group_t *saa_dstg; + boolean_t saa_needsuspend; + nvlist_t *saa_props; + + /* the following are used only if 'temporary' is set: */ + boolean_t saa_temporary; + const char *saa_htag; + struct dsl_ds_holdarg *saa_ha; + dsl_dataset_t *saa_newds; +} snapallarg_t; + +typedef struct snaponearg { + const char *soa_longname; /* long snap name */ + const char *soa_snapname; /* short snap name */ + snapallarg_t *soa_saa; +} snaponearg_t; static int snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; int error; /* The props have already been checked by zfs_check_userprops(). */ error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - sn->snapname, tx); + soa->soa_snapname, tx); if (error) return (error); - if (sn->temporary) { + if (saa->saa_temporary) { /* * Ideally we would just call * dsl_dataset_user_hold_check() and @@ -843,12 +854,13 @@ snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) * Not checking number of tags because the tag will be * unique, as it will be the only tag. */ - if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) return (E2BIG); - sn->ha = kmem_alloc(sizeof(struct dsl_ds_holdarg), KM_PUSHPAGE); - sn->ha->temphold = B_TRUE; - sn->ha->htag = sn->htag; + saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), + KM_PUSHPAGE); + saa->saa_ha->temphold = B_TRUE; + saa->saa_ha->htag = saa->saa_htag; } return (error); } @@ -858,24 +870,25 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) { objset_t *os = arg1; dsl_dataset_t *ds = os->os_dsl_dataset; - struct snaparg *sn = arg2; + snaponearg_t *soa = arg2; + snapallarg_t *saa = soa->soa_saa; - dsl_dataset_snapshot_sync(ds, sn->snapname, tx); + dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); - if (sn->props) { + if (saa->saa_props != NULL) { dsl_props_arg_t pa; - pa.pa_props = sn->props; + pa.pa_props = saa->saa_props; pa.pa_source = ZPROP_SRC_LOCAL; dsl_props_set_sync(ds->ds_prev, &pa, tx); } - if (sn->temporary) { + if (saa->saa_temporary) { struct dsl_ds_destroyarg da; - dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx); - kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg)); - sn->ha = NULL; - sn->newds = ds->ds_prev; + dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); + kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); + saa->saa_ha = NULL; + saa->saa_newds = ds->ds_prev; da.ds = ds->ds_prev; da.defer = B_TRUE; @@ -884,135 +897,182 @@ snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static int -dmu_objset_snapshot_one(const char *name, void *arg) +snapshot_one_impl(const char *snapname, void *arg) { - struct snaparg *sn = arg; + char *fsname; + snapallarg_t *saa = arg; + snaponearg_t *soa; objset_t *os; int err; - char *cp; - /* - * If the objset starts with a '%', then ignore it unless it was - * explicitly named (ie, not recursive). These hidden datasets - * are always inconsistent, and by not opening them here, we can - * avoid a race with dsl_dir_destroy_check(). - */ - cp = strrchr(name, '/'); - if (cp && cp[1] == '%' && sn->recursive) - return (0); + fsname = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); + (void) strlcpy(fsname, snapname, MAXPATHLEN); + strchr(fsname, '@')[0] = '\0'; - (void) strcpy(sn->failed, name); - - /* - * Check permissions if we are doing a recursive snapshot. The - * permission checks for the starting dataset have already been - * performed in zfs_secpolicy_snapshot() - */ - if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) - return (err); - - err = dmu_objset_hold(name, sn, &os); + err = dmu_objset_hold(fsname, saa, &os); + kmem_free(fsname, MAXPATHLEN); if (err != 0) return (err); /* * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. As with %hidden - * datasets, we return EBUSY if this name was explicitly - * requested (ie, not recursive), and otherwise ignore it. + * of being destroyed), don't snapshot it. */ if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, sn); - return (sn->recursive ? 0 : EBUSY); + dmu_objset_rele(os, saa); + return (EBUSY); } - if (sn->needsuspend) { + if (saa->saa_needsuspend) { err = zil_suspend(dmu_objset_zil(os)); if (err) { - dmu_objset_rele(os, sn); + dmu_objset_rele(os, saa); return (err); } } - dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, - os, sn, 3); + + soa = kmem_zalloc(sizeof (*soa), KM_PUSHPAGE); + soa->soa_saa = saa; + soa->soa_longname = snapname; + soa->soa_snapname = strchr(snapname, '@') + 1; + + dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, + os, soa, 3); return (0); } +/* + * The snapshots must all be in the same pool. + */ int -dmu_objset_snapshot(char *fsname, char *snapname, char *tag, - nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd) +dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) { dsl_sync_task_t *dst; - struct snaparg *sn; + snapallarg_t saa = { 0 }; spa_t *spa; - minor_t minor; + int rv = 0; int err; + nvpair_t *pair; - sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP); - (void) strcpy(sn->failed, fsname); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); - err = spa_open(fsname, &spa, FTAG); - if (err) { - kmem_free(sn, sizeof (struct snaparg)); + err = spa_open(nvpair_name(pair), &spa, FTAG); + if (err) return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_props = props; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + err = snapshot_one_impl(nvpair_name(pair), &saa); + if (err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + nvpair_name(pair), err); + } + rv = err; + } } - if (temporary) { - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); + /* + * If any call to snapshot_one_impl() failed, don't execute the + * sync task. The error handling code below will clean up the + * snaponearg_t from any successful calls to + * snapshot_one_impl(). + */ + if (rv == 0) + err = dsl_sync_task_group_wait(saa.saa_dstg); + if (err != 0) + rv = err; + + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { + objset_t *os = dst->dst_arg1; + snaponearg_t *soa = dst->dst_arg2; + if (dst->dst_err != 0) { + if (errors != NULL) { + fnvlist_add_int32(errors, + soa->soa_longname, dst->dst_err); + } + rv = dst->dst_err; } + + if (saa.saa_needsuspend) + zil_resume(dmu_objset_zil(os)); + dmu_objset_rele(os, &saa); + kmem_free(soa, sizeof (*soa)); } - sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - sn->snapname = snapname; - sn->htag = tag; - sn->props = props; - sn->recursive = recursive; - sn->needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - sn->temporary = temporary; - sn->ha = NULL; - sn->newds = NULL; - - if (recursive) { - err = dmu_objset_find(fsname, - dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN); - } else { - err = dmu_objset_snapshot_one(fsname, sn); + dsl_sync_task_group_destroy(saa.saa_dstg); + spa_close(spa, FTAG); + return (rv); +} + +int +dmu_objset_snapshot_one(const char *fsname, const char *snapname) +{ + int err; + char *longsnap = kmem_asprintf("%s@%s", fsname, snapname); + nvlist_t *snaps = fnvlist_alloc(); + + fnvlist_add_boolean(snaps, longsnap); + err = dmu_objset_snapshot(snaps, NULL, NULL); + fnvlist_free(snaps); + strfree(longsnap); + return (err); +} + +int +dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) +{ + dsl_sync_task_t *dst; + snapallarg_t saa = { 0 }; + spa_t *spa; + minor_t minor; + int err; + + err = spa_open(snapname, &spa, FTAG); + if (err) + return (err); + saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); + saa.saa_htag = tag; + saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + saa.saa_temporary = B_TRUE; + + if (cleanup_fd < 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { + spa_close(spa, FTAG); + return (err); } + err = snapshot_one_impl(snapname, &saa); + if (err == 0) - err = dsl_sync_task_group_wait(sn->dstg); + err = dsl_sync_task_group_wait(saa.saa_dstg); - for (dst = list_head(&sn->dstg->dstg_tasks); dst; - dst = list_next(&sn->dstg->dstg_tasks, dst)) { + for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; + dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - if (dst->dst_err) { - dsl_dataset_name(ds, sn->failed); - } else if (temporary) { - dsl_register_onexit_hold_cleanup(sn->newds, tag, minor); - } - if (sn->needsuspend) + dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); + if (saa.saa_needsuspend) zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, sn); + dmu_objset_rele(os, &saa); } - if (err) - (void) strcpy(fsname, sn->failed); - if (temporary) - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(sn->dstg); + zfs_onexit_fd_rele(cleanup_fd); + dsl_sync_task_group_destroy(saa.saa_dstg); spa_close(spa, FTAG); - kmem_free(sn, sizeof (struct snaparg)); return (err); } + static void dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) { @@ -1159,7 +1219,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; - while ((dr = list_head(list)) != NULL) { + while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) @@ -1219,7 +1279,7 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); - while ((dn = list_head(list)) != NULL) { + while ((dn = list_head(list))) { int flags; ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 54e75971d117..6552e1d9d72d 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -416,9 +416,48 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (err); } +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'. Or 'earlier' could be the origin of + * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's + * filesystem. Or 'earlier' could be the origin's origin. + */ +static boolean_t +is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ + dsl_pool_t *dp = later->ds_dir->dd_pool; + int error; + boolean_t ret; + dsl_dataset_t *origin; + + if (earlier->ds_phys->ds_creation_txg >= + later->ds_phys->ds_creation_txg) + return (B_FALSE); + + if (later->ds_dir == earlier->ds_dir) + return (B_TRUE); + if (!dsl_dir_is_clone(later->ds_dir)) + return (B_FALSE); + + rw_enter(&dp->dp_config_rwlock, RW_READER); + if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { + rw_exit(&dp->dp_config_rwlock); + return (B_TRUE); + } + error = dsl_dataset_hold_obj(dp, + later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + rw_exit(&dp->dp_config_rwlock); + if (error != 0) + return (B_FALSE); + ret = is_before(origin, earlier); + dsl_dataset_rele(origin, FTAG); + return (ret); +} + int -dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - int outfd, vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, + offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -431,30 +470,13 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -479,7 +501,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; - if (fromorigin) + if (fromds != NULL && ds->ds_dir != fromds->ds_dir) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) @@ -491,8 +513,6 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (fromds) fromtxg = fromds->ds_phys->ds_creation_txg; - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -550,8 +570,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, } int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - uint64_t *sizep) +dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; @@ -563,27 +582,13 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (ds->ds_phys->ds_next_snap_obj == 0) return (EINVAL); - /* fromsnap must be an earlier snapshot from the same fs as tosnap */ - if (fromds && (ds->ds_dir != fromds->ds_dir || - fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) + /* + * fromsnap must be an earlier snapshot from the same fs as tosnap, + * or the origin's fs. + */ + if (fromds != NULL && !is_before(ds, fromds)) return (EXDEV); - if (fromorigin) { - if (fromsnap) - return (EINVAL); - - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); - rw_exit(&dp->dp_config_rwlock); - if (err) - return (err); - } else { - fromorigin = B_FALSE; - } - } - /* Get uncompressed size estimate of changed data. */ if (fromds == NULL) { size = ds->ds_phys->ds_uncompressed_bytes; @@ -591,8 +596,6 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, &used, &comp, &size); - if (fromorigin) - dsl_dataset_rele(fromds, FTAG); if (err) return (err); } @@ -690,8 +693,7 @@ recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); } - spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, - dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(rbsa->ds, "receive new", tx, ""); } /* ARGSUSED */ @@ -792,8 +794,7 @@ recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) rbsa->ds = cds; - spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, - dp->dp_spa, tx, "dataset = %lld", dsobj); + spa_history_log_internal_ds(cds, "receive over existing", tx, ""); } static boolean_t @@ -1604,6 +1605,7 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + spa_history_log_internal_ds(ds, "finished receiving", tx, ""); } static int diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index fd714135a853..30867f9d76cb 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -64,7 +64,7 @@ dmu_tx_create_dd(dsl_dir_t *dd) { dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE); tx->tx_dir = dd; - if (dd) + if (dd != NULL) tx->tx_pool = dd->dd_pool; list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), offsetof(dmu_tx_hold_t, txh_node)); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index d9e8bd30fd14..2eca2b2044e2 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -907,7 +907,8 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, * The snapshots must all be in the same pool. */ int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) +dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, + nvlist_t *errlist) { int err; dsl_sync_task_t *dst; @@ -942,7 +943,7 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) } else if (err == ENOENT) { err = 0; } else { - (void) strcpy(failed, nvpair_name(pair)); + fnvlist_add_int32(errlist, nvpair_name(pair), err); break; } } @@ -956,10 +957,12 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed) dsl_dataset_t *ds = dsda->ds; /* - * Return the file system name that triggered the error + * Return the snapshots that triggered the error. */ - if (dst->dst_err) { - dsl_dataset_name(ds, failed); + if (dst->dst_err != 0) { + char name[ZFS_MAXNAMELEN]; + dsl_dataset_name(ds, name); + fnvlist_add_int32(errlist, name, dst->dst_err); } ASSERT3P(dsda->rm_origin, ==, NULL); dsl_dataset_disown(ds, dstg); @@ -1038,7 +1041,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dir_t *dd; uint64_t obj; struct dsl_ds_destroyarg dsda = { 0 }; - dsl_dataset_t *dummy_ds; dsda.ds = ds; @@ -1058,9 +1060,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) } dd = ds->ds_dir; - dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); - dummy_ds->ds_dir = dd; - dummy_ds->ds_object = ds->ds_object; if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { @@ -1072,11 +1071,11 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_dataset_destroy_begin_check, dsl_dataset_destroy_begin_sync, ds, NULL, 0); if (err) - goto out_free; + goto out; err = dmu_objset_from_ds(ds, &os); if (err) - goto out_free; + goto out; /* * Remove all objects while in the open context so that @@ -1091,7 +1090,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) (void) dmu_free_object(os, obj); } if (err != ESRCH) - goto out_free; + goto out; /* * Sync out all in-flight IO. @@ -1118,7 +1117,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) rw_exit(&dd->dd_pool->dp_config_rwlock); if (err) - goto out_free; + goto out; /* * Blow away the dsl_dir + head dataset. @@ -1134,7 +1133,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) err = dsl_dataset_origin_rm_prep(&dsda, tag); if (err) { dsl_dir_close(dd, FTAG); - goto out_free; + goto out; } } @@ -1142,7 +1141,7 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, dummy_ds, FTAG, 0); + dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); @@ -1166,8 +1165,6 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) if (err) dsl_dir_close(dd, FTAG); -out_free: - kmem_free(dummy_ds, sizeof (dsl_dataset_t)); out: dsl_dataset_disown(ds, tag); return (err); @@ -1333,14 +1330,12 @@ static void dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; - dsl_pool_t *dp = ds->ds_dir->dd_pool; /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "destroy begin", tx, ""); } static int @@ -1665,9 +1660,13 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + /* signal any waiters that this dataset is going away */ mutex_enter(&ds->ds_lock); ds->ds_owner = dsl_reaper; @@ -1965,8 +1964,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_dataset_rele(ds_prev, FTAG); spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx, - "dataset = %llu", ds->ds_object); if (ds->ds_phys->ds_next_clones_obj != 0) { ASSERTV(uint64_t count); @@ -2014,7 +2011,7 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) return (ENOSPC); /* - * Propogate any reserved space for this snapshot to other + * Propagate any reserved space for this snapshot to other * snapshot checks in this sync group. */ if (asize > 0) @@ -2024,10 +2021,9 @@ dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) } int -dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; int err; uint64_t value; @@ -2039,7 +2035,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) return (EAGAIN); /* - * Check for conflicting name snapshot name. + * Check for conflicting snapshot name. */ err = dsl_dataset_snap_lookup(ds, snapname, &value); if (err == 0) @@ -2063,10 +2059,9 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) } void -dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *snapname = arg2; dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; @@ -2172,8 +2167,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_snap_cmtime_update(ds->ds_dir); - spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx, - "dataset = %llu", dsobj); + spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); } void @@ -2259,7 +2253,20 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t refd, avail, uobjs, aobjs, ratio; - dsl_dir_stats(ds->ds_dir, nv); + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, + ds->ds_phys->ds_unique_bytes); + get_clones_stat(ds, nv); + } else { + dsl_dir_stats(ds->ds_dir, nv); + } dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); @@ -2305,22 +2312,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); - - if (ds->ds_phys->ds_next_snap_obj) { - /* - * This is a snapshot; override the dd's space used with - * our unique space and compression ratio. - */ - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); - - get_clones_stat(ds, nv); - } } void @@ -2329,27 +2320,25 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; - if (ds->ds_phys->ds_next_snap_obj) { + stat->dds_origin[0] = '\0'; + if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; - } - /* clone origin is really a dsl_dir thing... */ - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_dir_is_clone(ds->ds_dir)) { - dsl_dataset_t *ods; + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + if (dsl_dir_is_clone(ds->ds_dir)) { + dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); - dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); - } else { - stat->dds_origin[0] = '\0'; + VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dataset_name(ods, stat->dds_origin); + dsl_dataset_drop_ref(ods, FTAG); + } + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } uint64_t @@ -2466,8 +2455,8 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) ds->ds_snapname, 8, 1, &ds->ds_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, - "dataset = %llu", ds->ds_object); + spa_history_log_internal_ds(ds, "rename", tx, + "-> @%s", newsnapname); dsl_dataset_rele(hds, FTAG); } @@ -2946,8 +2935,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) origin_ds->ds_phys->ds_unique_bytes = pa->unique; /* log history record */ - spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, - "dataset = %llu", hds->ds_object); + spa_history_log_internal_ds(hds, "promote", tx, ""); dsl_dir_close(odd, FTAG); } @@ -3306,6 +3294,9 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) csa->ohds->ds_phys->ds_deadlist_obj); dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); + + spa_history_log_internal_ds(csa->cds, "clone swap", tx, + "parent=%s", csa->ohds->ds_dir->dd_myname); } /* @@ -3463,6 +3454,9 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (ds->ds_quota != effective_value) { dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_quota = effective_value; + + spa_history_log_internal_ds(ds, "set refquota", tx, + "refquota=%lld", (longlong_t)ds->ds_quota); } } @@ -3566,6 +3560,9 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); mutex_exit(&ds->ds_dir->dd_lock); + + spa_history_log_internal_ds(ds, "set refreservation", tx, + "refreservation=%lld", (longlong_t)effective_value); } int @@ -3631,7 +3628,7 @@ dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; int error = 0; @@ -3665,7 +3662,7 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) { dsl_dataset_t *ds = arg1; struct dsl_ds_holdarg *ha = arg2; - char *htag = ha->htag; + const char *htag = ha->htag; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t now = gethrestime_sec(); @@ -3693,9 +3690,9 @@ dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) htag, &now, tx)); } - spa_history_log_internal(LOG_DS_USER_HOLD, - dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag, - (int)ha->temphold, ds->ds_object); + spa_history_log_internal_ds(ds, "hold", tx, + "tag = %s temp = %d holds now = %llu", + htag, (int)ha->temphold, ds->ds_userrefs); } static int @@ -3902,7 +3899,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - uint64_t dsobj = ds->ds_object; uint64_t refs; int error; @@ -3914,11 +3910,6 @@ dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) VERIFY(error == 0 || error == ENOENT); zapobj = ds->ds_phys->ds_userrefs_obj; VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); - - spa_history_log_internal(LOG_DS_USER_RELEASE, - dp->dp_spa, tx, "<%s> %lld dataset = %llu", - ra->htag, (longlong_t)refs, dsobj); - if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)) { struct dsl_ds_destroyarg dsda = {0}; diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 294932c450bf..48c261e63806 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -181,10 +181,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, jumpobj, perm, 8, 1, &n, tx) == 0); - spa_history_log_internal(LOG_DS_PERM_UPDATE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission update", tx, + "%s %s", whokey, perm); } } } @@ -213,10 +211,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) (void) zap_remove(mos, zapobj, whokey, tx); VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s dataset = %llu", whokey, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission who remove", + tx, "%s", whokey); continue; } @@ -234,10 +230,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(0 == zap_destroy(mos, jumpobj, tx)); } - spa_history_log_internal(LOG_DS_PERM_REMOVE, - dd->dd_pool->dp_spa, tx, - "%s %s dataset = %llu", whokey, perm, - dd->dd_phys->dd_head_dataset_obj); + spa_history_log_internal_dd(dd, "permission remove", tx, + "%s %s", whokey, perm); } } } @@ -533,12 +527,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl, } /* - * Check if user has requested permission. If descendent is set, must have - * descendent perms. + * Check if user has requested permission. */ int -dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, - cred_t *cr) +dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) { dsl_dir_t *dd; dsl_pool_t *dp; @@ -559,7 +551,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm, SPA_VERSION_DELEGATED_PERMS) return (EPERM); - if (dsl_dataset_is_snapshot(ds) || descendent) { + if (dsl_dataset_is_snapshot(ds)) { /* * Snapshots are treated as descendents only, * local permissions do not apply. @@ -652,7 +644,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) if (error) return (error); - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); dsl_dataset_rele(ds, FTAG); return (error); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 69f68c209a10..45c73c363e57 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -39,8 +40,8 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); - +static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, + uint64_t value, dmu_tx_t *tx); /* ARGSUSED */ static void @@ -449,8 +450,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, int dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; @@ -481,24 +481,19 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) void dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *dd = arg1; objset_t *mos = dd->dd_pool->dp_meta_objset; - dsl_prop_setarg_t psa; - uint64_t value = 0; uint64_t obj; dd_used_t t; ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - /* Remove our reservation. */ - dsl_prop_setarg_init_uint64(&psa, "reservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dir_set_reservation_sync(ds, &psa, tx); + /* + * Remove our reservation. The impl() routine avoids setting the + * actual property, which would require the (already destroyed) ds. + */ + dsl_dir_set_reservation_sync_impl(dd, 0, tx); ASSERT0(dd->dd_phys->dd_used_bytes); ASSERT0(dd->dd_phys->dd_reserved); @@ -1062,6 +1057,9 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_enter(&dd->dd_lock); dd->dd_phys->dd_quota = effective_value; mutex_exit(&dd->dd_lock); + + spa_history_log_internal_dd(dd, "set quota", tx, + "quota=%lld", (longlong_t)effective_value); } int @@ -1148,25 +1146,17 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; uint64_t used; int64_t delta; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, effective_value) - - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = effective_value; + delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); + dd->dd_phys->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1176,6 +1166,23 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) mutex_exit(&dd->dd_lock); } +static void +dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = arg1; + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_setarg_t *psa = arg2; + uint64_t value = psa->psa_effective_value; + + dsl_prop_set_sync(ds, psa, tx); + DSL_PROP_CHECK_PREDICTION(dd, psa); + + dsl_dir_set_reservation_sync_impl(dd, value, tx); + + spa_history_log_internal_dd(dd, "set reservation", tx, + "reservation=%lld", (longlong_t)value); + } + int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation) @@ -1294,9 +1301,15 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err; + char namebuf[MAXNAMELEN]; ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + /* Log this before we change the name. */ + dsl_dir_name(ra->newparent, namebuf); + spa_history_log_internal_dd(dd, "rename", tx, + "-> %s/%s", namebuf, ra->mynewname); + if (ra->newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, @@ -1336,8 +1349,6 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dd->dd_myname, 8, 1, &dd->dd_object, tx); ASSERT0(err); - spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, - tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj); } int diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index e44a94853929..153420ccf5f1 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -704,11 +705,9 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } - spa_history_log_internal((source == ZPROP_SRC_NONE || - source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT : - LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, - "%s=%s dataset = %llu", propname, - (valstr == NULL ? "" : valstr), ds->ds_object); + spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE || + source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx, + "%s=%s", propname, (valstr == NULL ? "" : valstr)); if (tbuf != NULL) kmem_free(tbuf, ZAP_MAXVALUELEN); @@ -757,24 +756,6 @@ dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) } } -void -dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val, - dmu_tx_t *tx) -{ - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t zapobj = dd->dd_phys->dd_props_zapobj; - - ASSERT(dmu_tx_is_syncing(tx)); - - VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx)); - - dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE); - - spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, - "%s=%llu dataset = %llu", name, (u_longlong_t)val, - dd->dd_phys->dd_head_dataset_obj); -} - int dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, int intsz, int numints, const void *buf) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 34a4f0382d9a..90ca7b256606 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -224,7 +224,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_scan_sync_state(scn, tx); - spa_history_log_internal(LOG_POOL_SCAN, spa, tx, + spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); } @@ -273,7 +273,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) else scn->scn_phys.scn_state = DSS_CANCELED; - spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx, + spa_history_log_internal(spa, "scan done", tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index 75eb507b949c..2ed47fe0c95c 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -85,17 +86,17 @@ dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) /* Do a preliminary error check. */ dstg->dstg_err = 0; +#ifdef ZFS_DEBUG + /* + * Only check half the time, otherwise, the sync-context + * check will almost never fail. + */ + if (spa_get_random(2) == 0) + goto skip; +#endif rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); for (dst = list_head(&dstg->dstg_tasks); dst; dst = list_next(&dstg->dstg_tasks, dst)) { -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - continue; -#endif dst->dst_err = dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); if (dst->dst_err) @@ -107,6 +108,9 @@ dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) dmu_tx_commit(tx); return (dstg->dstg_err); } +#ifdef ZFS_DEBUG +skip: +#endif /* * We don't generally have many sync tasks, so pay the price of @@ -230,9 +234,6 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp, { dsl_sync_task_group_t *dstg; - if (!spa_writeable(dp->dp_spa)) - return; - dstg = dsl_sync_task_group_create(dp); dsl_sync_task_create(dstg, checkfunc, syncfunc, arg1, arg2, blocks_modified); diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 4cef53f95132..7f9290bd44c1 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #include #include @@ -262,3 +265,13 @@ rrw_held(rrwlock_t *rrl, krw_t rw) return (held); } + +void +rrw_tsd_destroy(void *arg) +{ + rrw_node_t *rn = arg; + if (rn != NULL) { + panic("thread %p terminating with rrw lock %p held", + (void *)curthread, (void *)rn->rn_rrl); + } +} diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 65f78b7846af..7c37ca426d4a 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -731,8 +731,8 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); - spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx, - "old=%lld new=%lld", oldguid, *newguid); + spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", + oldguid, *newguid); } /* @@ -2715,6 +2715,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, vdev_resilver_needed(rvd, NULL, NULL)) spa_async_request(spa, SPA_ASYNC_RESILVER); + /* + * Log the fact that we booted up (so that we can detect if + * we rebooted in the middle of an operation). + */ + spa_history_log_version(spa, "open"); + /* * Delete any inconsistent datasets. */ @@ -3399,7 +3405,7 @@ spa_l2cache_drop(spa_t *spa) */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - const char *history_str, nvlist_t *zplprops) + nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; @@ -3620,9 +3626,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_sync(spa, B_FALSE, B_TRUE); - if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) - (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); - spa_history_log_version(spa, LOG_POOL_CREATE); + spa_history_log_version(spa, "create"); spa->spa_minref = refcount_count(&spa->spa_refcount); @@ -3824,7 +3828,6 @@ spa_import_rootpool(char *devpath, char *devid) } error = 0; - spa_history_log_version(spa, LOG_POOL_IMPORT); out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); @@ -3886,7 +3889,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_config_sync(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); return (0); } @@ -4017,7 +4020,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); mutex_exit(&spa_namespace_lock); - spa_history_log_version(spa, LOG_POOL_IMPORT); + spa_history_log_version(spa, "import"); #ifdef _KERNEL zvol_create_minors(pool); @@ -4560,7 +4563,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, + spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, @@ -4778,7 +4781,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) error = spa_vdev_exit(spa, vd, txg, 0); - spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, + spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); @@ -5047,9 +5050,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) - spa_history_log_internal(LOG_POOL_VDEV_DETACH, - spa, tx, "vdev=%s", - vml[c]->vdev_path); + spa_history_log_internal(spa, "detach", tx, + "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } @@ -5064,8 +5066,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ - spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, - "split new pool %s from pool %s", newname, spa_name(spa)); + spa_history_log_internal(newspa, "split", NULL, + "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); @@ -5647,8 +5649,7 @@ spa_async_thread(spa_t *spa) * then log an internal history event. */ if (new_space != old_space) { - spa_history_log_internal(LOG_POOL_VDEV_ONLINE, - spa, NULL, + spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } @@ -5892,6 +5893,7 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* @@ -5927,6 +5929,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); spa_feature_enable(spa, feature, tx); + spa_history_log_internal(spa, "set", tx, + "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: @@ -5966,6 +5970,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); break; default: /* @@ -5988,7 +5994,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx) == 0); - + spa_history_log_internal(spa, "set", tx, + "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { VERIFY(nvpair_value_uint64(elem, &intval) == 0); @@ -6000,6 +6007,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); + spa_history_log_internal(spa, "set", tx, + "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } @@ -6028,13 +6037,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) } } - /* log internal history if this is not a zpool create */ - if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && - tx->tx_txg != TXG_INITIAL) { - spa_history_log_internal(LOG_POOL_PROPSET, - spa, tx, "%s %lld %s", - nvpair_name(elem), intval, spa_name(spa)); - } } mutex_exit(&spa->spa_props_lock); diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 9fb75f391b9a..79d48620c9e1 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -30,9 +30,12 @@ #include #include #include +#include +#include #include #include #include +#include #include "zfs_comutil.h" #ifdef _KERNEL #include @@ -185,7 +188,7 @@ spa_history_zone(void) return (curproc->p_zone->zone_name); #endif #else - return ("global"); + return (NULL); #endif } @@ -197,14 +200,12 @@ static void spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) { spa_t *spa = arg1; - history_arg_t *hap = arg2; - const char *history_str = hap->ha_history_str; + nvlist_t *nvl = arg2; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; size_t reclen; uint64_t le_len; - nvlist_t *nvrecord; char *record_packed = NULL; int ret; @@ -234,46 +235,36 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) } #endif - VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME, - gethrestime_sec()) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0); - if (hap->ha_zone != NULL) - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE, - hap->ha_zone) == 0); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); #ifdef _KERNEL - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST, - utsname.nodename) == 0); + fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); #endif - if (hap->ha_log_type == LOG_CMD_POOL_CREATE || - hap->ha_log_type == LOG_CMD_NORMAL) { - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD, - history_str) == 0); - - zfs_dbgmsg("command: %s", history_str); - } else { - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT, - hap->ha_event) == 0); - VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG, - tx->tx_txg) == 0); - VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR, - history_str) == 0); - - zfs_dbgmsg("internal %s pool:%s txg:%llu %s", - zfs_history_event_names[hap->ha_event], spa_name(spa), - (longlong_t)tx->tx_txg, history_str); - + if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + zfs_dbgmsg("command: %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { + if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { + zfs_dbgmsg("txg %lld %s %s (id %llu) %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } else { + zfs_dbgmsg("txg %lld %s %s", + fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), + fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); + } + } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { + zfs_dbgmsg("ioctl %s", + fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); } - VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0); - record_packed = kmem_alloc(reclen, KM_PUSHPAGE); - - VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen, - NV_ENCODE_XDR, KM_PUSHPAGE) == 0); + VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, + KM_PUSHPAGE), ==, 0); mutex_enter(&spa->spa_history_lock); - if (hap->ha_log_type == LOG_CMD_POOL_CREATE) - VERIFY(shpp->sh_eof == shpp->sh_pool_create_len); /* write out the packed length as little endian */ le_len = LE_64((uint64_t)reclen); @@ -281,33 +272,44 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (!ret) ret = spa_history_write(spa, record_packed, reclen, shpp, tx); - if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) { - shpp->sh_pool_create_len += sizeof (le_len) + reclen; - shpp->sh_bof = shpp->sh_pool_create_len; + /* The first command is the create, which we keep forever */ + if (ret == 0 && shpp->sh_pool_create_len == 0 && + nvlist_exists(nvl, ZPOOL_HIST_CMD)) { + shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; } mutex_exit(&spa->spa_history_lock); - nvlist_free(nvrecord); - kmem_free(record_packed, reclen); + fnvlist_pack_free(record_packed, reclen); dmu_buf_rele(dbp, FTAG); - - strfree(hap->ha_history_str); - if (hap->ha_zone != NULL) - strfree(hap->ha_zone); - kmem_free(hap, sizeof (history_arg_t)); + fnvlist_free(nvl); } /* * Write out a history event. */ int -spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) +spa_history_log(spa_t *spa, const char *msg) +{ + int err; + nvlist_t *nvl; + + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + + fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); + err = spa_history_log_nvl(spa, nvl); + fnvlist_free(nvl); + return (err); +} + +int +spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { - history_arg_t *ha; int err = 0; dmu_tx_t *tx; + nvlist_t *nvarg; - ASSERT(what != LOG_INTERNAL); + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) + return (EINVAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); @@ -316,19 +318,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) return (err); } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - ha->ha_history_str = strdup(history_str); - ha->ha_zone = strdup(spa_history_zone()); - ha->ha_log_type = what; - ha->ha_uid = crgetuid(CRED()); + VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE)); + if (spa_history_zone() != NULL) { + fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, + spa_history_zone()); + } + fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvarg, 0, tx); dmu_tx_commit(tx); - /* spa_history_log_sync will free ha and strings */ + /* spa_history_log_sync will free nvl */ return (err); + } /* @@ -345,7 +349,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) int err; /* - * If the command history doesn't exist (older pool), + * If the command history doesn't exist (older pool), * that's ok, just return ENOENT. */ if (!spa->spa_history) @@ -428,44 +432,54 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) return (err); } +/* + * The nvlist will be consumed by this call. + */ static void -log_internal(history_internal_events_t event, spa_t *spa, +log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, dmu_tx_t *tx, const char *fmt, va_list adx) { - history_arg_t *ha; - va_list adx_copy; + char *msg; + va_list adx1; /* * If this is part of creating a pool, not everything is * initialized yet, so don't bother logging the internal events. + * Likewise if the pool is not writeable. */ - if (tx->tx_txg == TXG_INITIAL) + if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { + fnvlist_free(nvl); return; + } - ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); - va_copy(adx_copy, adx); - ha->ha_history_str = kmem_vasprintf(fmt, adx_copy); - va_end(adx_copy); - ha->ha_log_type = LOG_INTERNAL; - ha->ha_event = event; - ha->ha_zone = NULL; - ha->ha_uid = 0; + va_copy(adx1, adx); + msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx1) + 1, KM_PUSHPAGE); + va_end(adx1); + va_copy(adx1, adx); + (void) vsprintf(msg, fmt, adx1); + va_end(adx1); + fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); + strfree(msg); + + fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); + fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, ha, tx); + spa_history_log_sync(spa, nvl, tx); } else { dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, ha, 0, tx); + spa_history_log_sync, spa, nvl, 0, tx); } - /* spa_history_log_sync() will free ha and strings */ + /* spa_history_log_sync() will free nvl */ } void -spa_history_log_internal(history_internal_events_t event, spa_t *spa, +spa_history_log_internal(spa_t *spa, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; va_list adx; + nvlist_t *nvl; /* create a tx if we didn't get one */ if (tx == NULL) { @@ -477,7 +491,8 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } va_start(adx, fmt); - log_internal(event, spa, htx, fmt, adx); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + log_internal(nvl, operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -486,21 +501,58 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa, } void -spa_history_log_version(spa_t *spa, history_internal_events_t event) +spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dataset_name(ds, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); + + va_start(adx, fmt); + log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...) +{ + va_list adx; + char namebuf[MAXNAMELEN]; + nvlist_t *nvl; + + ASSERT(tx != NULL); + + dsl_dir_name(dd, namebuf); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); + fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, + dd->dd_phys->dd_head_dataset_obj); + + va_start(adx, fmt); + log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); + va_end(adx); +} + +void +spa_history_log_version(spa_t *spa, const char *operation) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); - if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { - spa_history_log_internal(event, spa, NULL, - "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, - utsname.nodename, utsname.release, utsname.version, - utsname.machine); - } - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", - event == LOG_POOL_IMPORT ? "imported" : - event == LOG_POOL_CREATE ? "created" : "accessed", + spa_history_log_internal(spa, operation, NULL, + "pool version %llu; software version %llu/%d; uts %s %s %s %s", + (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + utsname.nodename, utsname.release, utsname.version, + utsname.machine); + cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif } diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index b35f27d194db..a03e1c6948ea 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -632,8 +632,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, goto out; if (error == 0) { - error = dmu_objset_snapshot(dsname, dirname, - NULL, NULL, B_FALSE, B_FALSE, -1); + error = dmu_objset_snapshot_one(dsname, dirname); if (error) goto out; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index a9184a1b61bd..e64d6a1f04eb 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -28,6 +28,108 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * ZFS ioctls. + * + * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage + * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool. + * + * There are two ways that we handle ioctls: the legacy way where almost + * all of the logic is in the ioctl callback, and the new way where most + * of the marshalling is handled in the common entry point, zfsdev_ioctl(). + * + * Non-legacy ioctls should be registered by calling + * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked + * from userland by lzc_ioctl(). + * + * The registration arguments are as follows: + * + * const char *name + * The name of the ioctl. This is used for history logging. If the + * ioctl returns successfully (the callback returns 0), and allow_log + * is true, then a history log entry will be recorded with the input & + * output nvlists. The log entry can be printed with "zpool history -i". + * + * zfs_ioc_t ioc + * The ioctl request number, which userland will pass to ioctl(2). + * The ioctl numbers can change from release to release, because + * the caller (libzfs) must be matched to the kernel. + * + * zfs_secpolicy_func_t *secpolicy + * This function will be called before the zfs_ioc_func_t, to + * determine if this operation is permitted. It should return EPERM + * on failure, and 0 on success. Checks include determining if the + * dataset is visible in this zone, and if the user has either all + * zfs privileges in the zone (SYS_MOUNT), or has been granted permission + * to do this operation on this dataset with "zfs allow". + * + * zfs_ioc_namecheck_t namecheck + * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool + * name, a dataset name, or nothing. If the name is not well-formed, + * the ioctl will fail and the callback will not be called. + * Therefore, the callback can assume that the name is well-formed + * (e.g. is null-terminated, doesn't have more than one '@' character, + * doesn't have invalid characters). + * + * zfs_ioc_poolcheck_t pool_check + * This specifies requirements on the pool state. If the pool does + * not meet them (is suspended or is readonly), the ioctl will fail + * and the callback will not be called. If any checks are specified + * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME. + * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED | + * POOL_CHECK_READONLY). + * + * boolean_t smush_outnvlist + * If smush_outnvlist is true, then the output is presumed to be a + * list of errors, and it will be "smushed" down to fit into the + * caller's buffer, by removing some entries and replacing them with a + * single "N_MORE_ERRORS" entry indicating how many were removed. See + * nvlist_smush() for details. If smush_outnvlist is false, and the + * outnvlist does not fit into the userland-provided buffer, then the + * ioctl will fail with ENOMEM. + * + * zfs_ioc_func_t *func + * The callback function that will perform the operation. + * + * The callback should return 0 on success, or an error number on + * failure. If the function fails, the userland ioctl will return -1, + * and errno will be set to the callback's return value. The callback + * will be called with the following arguments: + * + * const char *name + * The name of the pool or dataset to operate on, from + * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the + * expected type (pool, dataset, or none). + * + * nvlist_t *innvl + * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or + * NULL if no input nvlist was provided. Changes to this nvlist are + * ignored. If the input nvlist could not be deserialized, the + * ioctl will fail and the callback will not be called. + * + * nvlist_t *outnvl + * The output nvlist, initially empty. The callback can fill it in, + * and it will be returned to userland by serializing it into + * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization + * fails (e.g. because the caller didn't supply a large enough + * buffer), then the overall ioctl will fail. See the + * 'smush_nvlist' argument above for additional behaviors. + * + * There are two typical uses of the output nvlist: + * - To return state, e.g. property values. In this case, + * smush_outnvlist should be false. If the buffer was not large + * enough, the caller will reallocate a larger buffer and try + * the ioctl again. + * + * - To return multiple errors from an ioctl which makes on-disk + * changes. In this case, smush_outnvlist should be true. + * Ioctls which make on-disk modifications should generally not + * use the outnvl if they succeed, because the caller can not + * distinguish between the operation failing, and + * deserialization failing. */ #include @@ -91,8 +193,13 @@ list_t zfsdev_state_list; extern void zfs_init(void); extern void zfs_fini(void); -typedef int zfs_ioc_func_t(zfs_cmd_t *); -typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +uint_t zfs_fsyncer_key; +extern uint_t rrw_tsd_key; +static uint_t zfs_allow_log_key; + +typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *); +typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *); +typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *); typedef enum { NO_NAME, @@ -103,15 +210,18 @@ typedef enum { typedef enum { POOL_CHECK_NONE = 1 << 0, POOL_CHECK_SUSPENDED = 1 << 1, - POOL_CHECK_READONLY = 1 << 2 + POOL_CHECK_READONLY = 1 << 2, } zfs_ioc_poolcheck_t; typedef struct zfs_ioc_vec { + zfs_ioc_legacy_func_t *zvec_legacy_func; zfs_ioc_func_t *zvec_func; zfs_secpolicy_func_t *zvec_secpolicy; zfs_ioc_namecheck_t zvec_namecheck; - boolean_t zvec_his_log; + boolean_t zvec_allow_log; zfs_ioc_poolcheck_t zvec_pool_check; + boolean_t zvec_smush_outnvlist; + const char *zvec_name; } zfs_ioc_vec_t; /* This array is indexed by zfs_userquota_prop_t */ @@ -129,7 +239,8 @@ static int zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); -int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); +int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); +static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature); static int zfs_prop_activate_feature_check(void *arg1, void *arg2, @@ -237,7 +348,7 @@ zfs_log_history(zfs_cmd_t *zc) if (spa_open(zc->zc_name, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) - (void) spa_history_log(spa, buf, LOG_CMD_NORMAL); + (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } history_str_free(buf); @@ -249,7 +360,7 @@ zfs_log_history(zfs_cmd_t *zc) */ /* ARGSUSED */ static int -zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (0); } @@ -260,7 +371,7 @@ zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (INGLOBALZONE(curproc) || zone_dataset_visible(zc->zc_name, NULL)) @@ -329,26 +440,13 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) return (zfs_dozonecheck_impl(dataset, zoned, cr)); } -/* - * If name ends in a '@', then require recursive permissions. - */ -int +static int zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; - boolean_t descendent = B_FALSE; dsl_dataset_t *ds; - char *at; - - at = strchr(name, '@'); - if (at != NULL && at[1] == '\0') { - *at = '\0'; - descendent = B_TRUE; - } error = dsl_dataset_hold(name, FTAG, &ds); - if (at != NULL) - *at = '@'; if (error != 0) return (error); @@ -356,14 +454,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, descendent, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } dsl_dataset_rele(ds, FTAG); return (error); } -int +static int zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, const char *perm, cred_t *cr) { @@ -373,7 +471,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, if (error == 0) { error = secpolicy_zfs(cr); if (error) - error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr); + error = dsl_deleg_access_impl(ds, perm, cr); } return (error); } @@ -533,8 +631,9 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr)); } -int -zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -549,15 +648,17 @@ zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr) return (0); } -int -zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_ROLLBACK, cr)); } -int -zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) +/* ARGSUSED */ +static int +zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { spa_t *spa; dsl_pool_t *dp; @@ -593,9 +694,18 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ +static int +zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_SEND, cr)); +} + #ifdef HAVE_SMB_SHARE +/* ARGSUSED */ static int -zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { vnode_t *vp; int error; @@ -620,7 +730,7 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) #endif /* HAVE_SMB_SHARE */ int -zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -629,7 +739,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_nfs(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -637,7 +747,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) } int -zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { #ifdef HAVE_SMB_SHARE if (!INGLOBALZONE(curproc)) @@ -646,7 +756,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) if (secpolicy_smb(cr) == 0) { return (0); } else { - return (zfs_secpolicy_deleg_share(zc, cr)); + return (zfs_secpolicy_deleg_share(zc, innvl, cr)); } #else return (ENOTSUP); @@ -687,29 +797,55 @@ zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_destroy_perms(zc->zc_name, cr)); } /* * Destroying snapshots with delegated permissions requires - * descendent mount and destroy permissions. + * descendant mount and destroy permissions. */ +/* ARGSUSED */ static int -zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int error; - char *dsname; + nvlist_t *snaps; + nvpair_t *pair, *nextpair; + int error = 0; - dsname = kmem_asprintf("%s@", zc->zc_name); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nextpair) { + dsl_dataset_t *ds; - error = zfs_secpolicy_destroy_perms(dsname, cr); - if (error == ENOENT) - error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); + nextpair = nvlist_next_nvpair(snaps, pair); + error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); + if (error == 0) { + dsl_dataset_rele(ds, FTAG); + } else if (error == ENOENT) { + /* + * Ignore any snapshots that don't exist (we consider + * them "already destroyed"). Remove the name from the + * nvl here in case the snapshot is created between + * now and when we try to destroy it (in which case + * we don't want to destroy it since we haven't + * checked for permission). + */ + fnvlist_remove_nvpair(snaps, pair); + error = 0; + continue; + } else { + break; + } + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); + if (error != 0) + break; + } - strfree(dsname); return (error); } @@ -742,14 +878,16 @@ zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; objset_t *clone; @@ -789,8 +927,9 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr) return (error); } +/* ARGSUSED */ static int -zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -813,37 +952,72 @@ zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) ZFS_DELEG_PERM_SNAPSHOT, cr)); } +/* + * Check for permission to create each snapshot in the nvlist. + */ +/* ARGSUSED */ static int -zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { + nvlist_t *snaps; + int error = 0; + nvpair_t *pair; + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char *name = nvpair_name(pair); + char *atp = strchr(name, '@'); - return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr)); + if (atp == NULL) { + error = EINVAL; + break; + } + *atp = '\0'; + error = zfs_secpolicy_snapshot_perms(name, cr); + *atp = '@'; + if (error != 0) + break; + } + return (error); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + /* + * Even root must have a proper TSD so that we know what pool + * to log to. + */ + if (tsd_get(zfs_allow_log_key) == NULL) + return (EPERM); + return (0); } static int -zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { char parentname[MAXNAMELEN]; int error; + char *origin; if ((error = zfs_get_parent(zc->zc_name, parentname, sizeof (parentname))) != 0) return (error); - if (zc->zc_value[0] != '\0') { - if ((error = zfs_secpolicy_write_perms(zc->zc_value, - ZFS_DELEG_PERM_CLONE, cr)) != 0) - return (error); - } + if (nvlist_lookup_string(innvl, "origin", &origin) == 0 && + (error = zfs_secpolicy_write_perms(origin, + ZFS_DELEG_PERM_CLONE, cr)) != 0) + return (error); if ((error = zfs_secpolicy_write_perms(parentname, ZFS_DELEG_PERM_CREATE, cr)) != 0) return (error); - error = zfs_secpolicy_write_perms(parentname, - ZFS_DELEG_PERM_MOUNT, cr); - - return (error); + return (zfs_secpolicy_write_perms(parentname, + ZFS_DELEG_PERM_MOUNT, cr)); } /* @@ -852,7 +1026,7 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { if (secpolicy_sys_config(cr, B_FALSE) != 0) return (EPERM); @@ -865,7 +1039,7 @@ zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { int error; @@ -881,13 +1055,14 @@ zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr) */ /* ARGSUSED */ static int -zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (secpolicy_zinject(cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { zfs_prop_t prop = zfs_name_to_prop(zc->zc_value); @@ -903,9 +1078,9 @@ zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -932,9 +1107,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) } static int -zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - int err = zfs_secpolicy_read(zc, cr); + int err = zfs_secpolicy_read(zc, innvl, cr); if (err) return (err); @@ -945,22 +1120,25 @@ zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) userquota_perms[zc->zc_objset_type], cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, NULL, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_HOLD, cr)); } +/* ARGSUSED */ static int -zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { return (zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_RELEASE, cr)); @@ -970,7 +1148,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr) * Policy for allowing temporary snapshots to be taken or released */ static int -zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) +zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { /* * A temporary snapshot is the same as a snapshot, @@ -983,13 +1161,13 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr) ZFS_DELEG_PERM_DIFF, cr)) == 0) return (0); - error = zfs_secpolicy_snapshot(zc, cr); + error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); if (!error) - error = zfs_secpolicy_hold(zc, cr); + error = zfs_secpolicy_hold(zc, innvl, cr); if (!error) - error = zfs_secpolicy_release(zc, cr); + error = zfs_secpolicy_release(zc, innvl, cr); if (!error) - error = zfs_secpolicy_destroy(zc, cr); + error = zfs_secpolicy_destroy(zc, innvl, cr); return (error); } @@ -1028,36 +1206,40 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) return (0); } +/* + * Reduce the size of this nvlist until it can be serialized in 'max' bytes. + * Entries will be removed from the end of the nvlist, and one int32 entry + * named "N_MORE_ERRORS" will be added indicating how many entries were + * removed. + */ static int -fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) +nvlist_smush(nvlist_t *errors, size_t max) { size_t size; - VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(errors); - if (size > zc->zc_nvlist_dst_size) { + if (size > max) { nvpair_t *more_errors; int n = 0; - if (zc->zc_nvlist_dst_size < 1024) + if (max < 1024) return (ENOMEM); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0); - more_errors = nvlist_prev_nvpair(*errors, NULL); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0); + more_errors = nvlist_prev_nvpair(errors, NULL); do { - nvpair_t *pair = nvlist_prev_nvpair(*errors, + nvpair_t *pair = nvlist_prev_nvpair(errors, more_errors); - VERIFY(nvlist_remove_nvpair(*errors, pair) == 0); + fnvlist_remove_nvpair(errors, pair); n++; - VERIFY(nvlist_size(*errors, &size, - NV_ENCODE_NATIVE) == 0); - } while (size > zc->zc_nvlist_dst_size); + size = fnvlist_size(errors); + } while (size > max); - VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0); - VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0); - ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0); - ASSERT(size <= zc->zc_nvlist_dst_size); + fnvlist_remove_nvpair(errors, more_errors); + fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n); + ASSERT3U(fnvlist_size(errors), <=, max); } return (0); @@ -1070,21 +1252,20 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) int error = 0; size_t size; - VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0); + size = fnvlist_size(nvl); if (size > zc->zc_nvlist_dst_size) { error = ENOMEM; } else { - packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); - VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE, - KM_SLEEP) == 0); + packed = fnvlist_pack(nvl, &size); if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags) != 0) error = EFAULT; - kmem_free(packed, size); + fnvlist_pack_free(packed, size); } zc->zc_nvlist_dst_size = size; + zc->zc_nvlist_dst_filled = B_TRUE; return (error); } @@ -1163,7 +1344,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; - char *buf; if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config))) @@ -1203,9 +1383,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - buf = history_str_get(zc); - - error = spa_create(zc->zc_name, config, props, buf, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops); /* * Set the remaining root properties @@ -1214,9 +1392,6 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) (void) spa_destroy(zc->zc_name); - if (buf != NULL) - history_str_free(buf); - pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -2249,32 +2424,26 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, /* * This function is best effort. If it fails to set any of the given properties, - * it continues to set as many as it can and returns the first error - * encountered. If the caller provides a non-NULL errlist, it also gives the - * complete list of names of all the properties it failed to set along with the - * corresponding error numbers. The caller is responsible for freeing the - * returned errlist. + * it continues to set as many as it can and returns the last error + * encountered. If the caller provides a non-NULL errlist, it will be filled in + * with the list of names of all the properties that failed along with the + * corresponding error numbers. * - * If every property is set successfully, zero is returned and the list pointed - * at by errlist is NULL. + * If every property is set successfully, zero is returned and errlist is not + * modified. */ int zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, - nvlist_t **errlist) + nvlist_t *errlist) { nvpair_t *pair; nvpair_t *propval; int rv = 0; uint64_t intval; char *strval; - nvlist_t *genericnvl; - nvlist_t *errors; - nvlist_t *retrynvl; - - VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + nvlist_t *genericnvl = fnvlist_alloc(); + nvlist_t *retrynvl = fnvlist_alloc(); retry: pair = NULL; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { @@ -2286,7 +2455,7 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); + attrs = fnvpair_value_nvlist(pair); if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &propval) != 0) err = EINVAL; @@ -2311,8 +2480,7 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, } else if (nvpair_type(propval) == DATA_TYPE_UINT64) { const char *unused; - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2356,8 +2524,11 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, } } - if (err != 0) - VERIFY(nvlist_add_int32(errors, propname, err) == 0); + if (err != 0) { + if (errlist != NULL) + fnvlist_add_int32(errlist, propname, err); + rv = err; + } } if (nvl != retrynvl && !nvlist_empty(retrynvl)) { @@ -2379,44 +2550,33 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, propval = pair; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &propval) == 0); + attrs = fnvpair_value_nvlist(pair); + propval = fnvlist_lookup_nvpair(attrs, + ZPROP_VALUE); } if (nvpair_type(propval) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(propval, - &strval) == 0); + strval = fnvpair_value_string(propval); err = dsl_prop_set(dsname, propname, source, 1, strlen(strval) + 1, strval); } else { - VERIFY(nvpair_value_uint64(propval, - &intval) == 0); + intval = fnvpair_value_uint64(propval); err = dsl_prop_set(dsname, propname, source, 8, 1, &intval); } if (err != 0) { - VERIFY(nvlist_add_int32(errors, propname, - err) == 0); + if (errlist != NULL) { + fnvlist_add_int32(errlist, propname, + err); + } + rv = err; } } } nvlist_free(genericnvl); nvlist_free(retrynvl); - if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) { - nvlist_free(errors); - errors = NULL; - } else { - VERIFY(nvpair_value_int32(pair, &rv) == 0); - } - - if (errlist == NULL) - nvlist_free(errors); - else - *errlist = errors; - return (rv); } @@ -2424,7 +2584,7 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(char *fsname, nvlist_t *nvl) +zfs_check_userprops(const char *fsname, nvlist_t *nvl) { nvpair_t *pair = NULL; int error = 0; @@ -2504,7 +2664,7 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) boolean_t received = zc->zc_cookie; zprop_source_t source = (received ? ZPROP_SRC_RECEIVED : ZPROP_SRC_LOCAL); - nvlist_t *errors = NULL; + nvlist_t *errors; int error; if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, @@ -2527,7 +2687,8 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) } } - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors); + errors = fnvlist_alloc(); + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2609,7 +2770,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) return (EINVAL); } - /* the property name has been validated by zfs_secpolicy_inherit() */ + /* property name has been validated by zfs_secpolicy_inherit_prop() */ return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); } @@ -2955,26 +3116,30 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, } /* - * inputs: - * zc_objset_type type of objset to create (fs vs zvol) - * zc_name name of new objset - * zc_value name of snapshot to clone from (may be empty) - * zc_nvlist_src{_size} nvlist of properties to apply + * innvl: { + * "type" -> dmu_objset_type_t (int32) + * (optional) "props" -> { prop -> value } + * } * - * outputs: none + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_create(zfs_cmd_t *zc) +zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *clone; int error = 0; - zfs_creat_t zct; + zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - dmu_objset_type_t type = zc->zc_objset_type; + int32_t type32; + dmu_objset_type_t type; + boolean_t is_insensitive = B_FALSE; - switch (type) { + if (nvlist_lookup_int32(innvl, "type", &type32) != 0) + return (EINVAL); + type = type32; + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + switch (type) { case DMU_OST_ZFS: cbfunc = zfs_create_cb; break; @@ -2987,154 +3152,221 @@ zfs_ioc_create(zfs_cmd_t *zc) cbfunc = NULL; break; } - if (strchr(zc->zc_name, '@') || - strchr(zc->zc_name, '%')) + if (strchr(fsname, '@') || + strchr(fsname, '%')) return (EINVAL); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) - return (error); - - zct.zct_zplprops = NULL; zct.zct_props = nvprops; - if (zc->zc_value[0] != '\0') { - /* - * We're creating a clone of an existing snapshot. - */ - zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; - if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - - error = dmu_objset_hold(zc->zc_value, FTAG, &clone); - if (error) { - nvlist_free(nvprops); - return (error); - } + if (cbfunc == NULL) + return (EINVAL); - error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); - if (error) { - nvlist_free(nvprops); - return (error); - } - } else { - boolean_t is_insensitive = B_FALSE; + if (type == DMU_OST_ZVOL) { + uint64_t volsize, volblocksize; - if (cbfunc == NULL) { - nvlist_free(nvprops); + if (nvprops == NULL) + return (EINVAL); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0) return (EINVAL); - } - - if (type == DMU_OST_ZVOL) { - uint64_t volsize, volblocksize; - - if (nvprops == NULL || - nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - &volsize) != 0) { - nvlist_free(nvprops); - return (EINVAL); - } - if ((error = nvlist_lookup_uint64(nvprops, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - &volblocksize)) != 0 && error != ENOENT) { - nvlist_free(nvprops); - return (EINVAL); - } + if ((error = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize)) != 0 && error != ENOENT) + return (EINVAL); - if (error != 0) - volblocksize = zfs_prop_default_numeric( - ZFS_PROP_VOLBLOCKSIZE); + if (error != 0) + volblocksize = zfs_prop_default_numeric( + ZFS_PROP_VOLBLOCKSIZE); - if ((error = zvol_check_volblocksize( - volblocksize)) != 0 || - (error = zvol_check_volsize(volsize, - volblocksize)) != 0) { - nvlist_free(nvprops); - return (error); - } - } else if (type == DMU_OST_ZFS) { - int error; + if ((error = zvol_check_volblocksize( + volblocksize)) != 0 || + (error = zvol_check_volsize(volsize, + volblocksize)) != 0) + return (error); + } else if (type == DMU_OST_ZFS) { + int error; - /* - * We have to have normalization and - * case-folding flags correct when we do the - * file system creation, so go figure them out - * now. - */ - VERIFY(nvlist_alloc(&zct.zct_zplprops, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - error = zfs_fill_zplprops(zc->zc_name, nvprops, - zct.zct_zplprops, &is_insensitive); - if (error != 0) { - nvlist_free(nvprops); - nvlist_free(zct.zct_zplprops); - return (error); - } + /* + * We have to have normalization and + * case-folding flags correct when we do the + * file system creation, so go figure them out + * now. + */ + VERIFY(nvlist_alloc(&zct.zct_zplprops, + NV_UNIQUE_NAME, KM_SLEEP) == 0); + error = zfs_fill_zplprops(fsname, nvprops, + zct.zct_zplprops, &is_insensitive); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); } - error = dmu_objset_create(zc->zc_name, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); - nvlist_free(zct.zct_zplprops); } + error = dmu_objset_create(fsname, type, + is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + /* * It would be nice to do this atomically. */ if (error == 0) { - error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, - nvprops, NULL); + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + (void) dmu_objset_destroy(fsname, B_FALSE); } - nvlist_free(nvprops); return (error); } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snapshot - * zc_cookie recursive flag - * zc_nvlist_src[_size] property list + * innvl: { + * "origin" -> name of origin snapshot + * (optional) "props" -> { prop -> value } + * } * * outputs: - * zc_value short snapname (i.e. part after the '@') + * outnvl: propname -> error code (int32) */ static int -zfs_ioc_snapshot(zfs_cmd_t *zc) +zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) { + int error = 0; nvlist_t *nvprops = NULL; - int error; - boolean_t recursive = zc->zc_cookie; + char *origin_name; + dsl_dataset_t *origin; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) + if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) return (EINVAL); + (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvprops)) != 0) + if (strchr(fsname, '@') || + strchr(fsname, '%')) + return (EINVAL); + + if (dataset_namecheck(origin_name, NULL, NULL) != 0) + return (EINVAL); + + error = dsl_dataset_hold(origin_name, FTAG, &origin); + if (error) return (error); - error = zfs_check_userprops(zc->zc_name, nvprops); + error = dmu_objset_clone(fsname, origin, 0); + dsl_dataset_rele(origin, FTAG); if (error) - goto out; + return (error); - if (!nvlist_empty(nvprops) && - zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) { - error = ENOTSUP; - goto out; + /* + * It would be nice to do this atomically. + */ + if (error == 0) { + error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, + nvprops, outnvl); + if (error != 0) + (void) dmu_objset_destroy(fsname, B_FALSE); } + return (error); +} - error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL, - nvprops, recursive, B_FALSE, -1); +/* + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional) "props" -> { prop -> value (string) } + * } + * + * outnvl: snapshot -> error code (int32) + * + */ +static int +zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + nvlist_t *snaps; + nvlist_t *props = NULL; + int error, poollen; + nvpair_t *pair, *pair2; -out: - nvlist_free(nvprops); + (void) nvlist_lookup_nvlist(innvl, "props", &props); + if ((error = zfs_check_userprops(poolname, props)) != 0) + return (error); + + if (!nvlist_empty(props) && + zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) + return (ENOTSUP); + + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + const char *name = nvpair_name(pair); + const char *cp = strchr(name, '@'); + + /* + * The snap name must contain an @, and the part after it must + * contain only valid characters. + */ + if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0) + return (EINVAL); + + /* + * The snap must be in the specified pool. + */ + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + + /* This must be the only snap of this fs. */ + for (pair2 = nvlist_next_nvpair(snaps, pair); + pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { + if (strncmp(name, nvpair_name(pair2), cp - name + 1) + == 0) { + return (EXDEV); + } + } + } + + error = dmu_objset_snapshot(snaps, props, outnvl); + return (error); +} + +/* + * innvl: "message" -> string + */ +/* ARGSUSED */ +static int +zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) +{ + char *message; + spa_t *spa; + int error; + char *poolname; + + /* + * The poolname in the ioctl is not set, we get it from the TSD, + * which was set at the end of the last successful ioctl that allows + * logging. The secpolicy func already checked that it is set. + * Only one log ioctl is allowed after each successful ioctl, so + * we clear the TSD here. + */ + poolname = tsd_get(zfs_allow_log_key); + (void) tsd_set(zfs_allow_log_key, NULL); + error = spa_open(poolname, &spa, FTAG); + strfree(poolname); + if (error != 0) + return (error); + + if (nvlist_lookup_string(innvl, "message", &message) != 0) { + spa_close(spa, FTAG); + return (EINVAL); + } + + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) { + spa_close(spa, FTAG); + return (ENOTSUP); + } + + error = spa_history_log(spa, message); + spa_close(spa, FTAG); return (error); } @@ -3143,6 +3375,7 @@ zfs_ioc_snapshot(zfs_cmd_t *zc) * name dataset name, or when 'arg == NULL' the full snapshot name * arg short snapshot name (i.e. part after the '@') */ +/* ARGSUSED */ int zfs_unmount_snap(const char *name, void *arg) { @@ -3153,22 +3386,13 @@ zfs_unmount_snap(const char *name, void *arg) char *ptr; int error; - if (arg) { - dsname = strdup(name); - snapname = strdup(arg); - } else { - ptr = strchr(name, '@'); - if (ptr) { - dsname = strdup(name); - dsname[ptr - name] = '\0'; - snapname = strdup(ptr + 1); - } else { - return (0); - } - } + if ((ptr = strchr(name, '@')) == NULL) + return (0); + dsname = strdup(name); + dsname[ptr - name] = '\0'; + snapname = strdup(ptr + 1); fullname = kmem_asprintf("%s@%s", dsname, snapname); - error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (error == 0) { error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); @@ -3187,47 +3411,46 @@ zfs_unmount_snap(const char *name, void *arg) } /* - * inputs: - * zc_name name of filesystem, snaps must be under it - * zc_nvlist_src[_size] full names of snapshots to destroy - * zc_defer_destroy mark for deferred destroy + * innvl: { + * "snaps" -> { snapshot1, snapshot2 } + * (optional boolean) "defer" + * } * - * outputs: - * zc_name on failure, name of failed snapshot + * outnvl: snapshot -> error code (int32) */ static int -zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc) +zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) { - int err, len; - nvlist_t *nvl; + int poollen; + nvlist_t *snaps; nvpair_t *pair; + boolean_t defer; - if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &nvl)) != 0) - return (err); + if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0) + return (EINVAL); + defer = nvlist_exists(innvl, "defer"); - len = strlen(zc->zc_name); - for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL; - pair = nvlist_next_nvpair(nvl, pair)) { + poollen = strlen(poolname); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); + /* - * The snap name must be underneath the zc_name. This ensures - * that our permission checks were legitimate. + * The snap must be in the specified pool. */ - if (strncmp(zc->zc_name, name, len) != 0 || - (name[len] != '@' && name[len] != '/')) { - nvlist_free(nvl); - return (EINVAL); - } + if (strncmp(name, poolname, poollen) != 0 || + (name[poollen] != '/' && name[poollen] != '@')) + return (EXDEV); + /* + * Ignore failures to unmount; dmu_snapshots_destroy_nvl() + * will deal with this gracefully (by filling in outnvl). + */ (void) zfs_unmount_snap(name, NULL); (void) zvol_remove_minor(name); } - err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy, - zc->zc_name); - nvlist_free(nvl); - return (err); + return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl)); } /* @@ -3605,7 +3828,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) (void) strcpy(zc->zc_value, nvpair_name(pair)); if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 || - (err = zfs_secpolicy_inherit(zc, CRED())) != 0) { + (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) { VERIFY(nvlist_remove_nvpair(props, pair) == 0); VERIFY(nvlist_add_int32(errors, zc->zc_value, err) == 0); @@ -3813,8 +4036,6 @@ zfs_ioc_recv(zfs_cmd_t *zc) * dmu_recv_begin() succeeds. */ if (props) { - nvlist_t *errlist; - if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { if (drc.drc_newfs) { if (spa_version(os->os_spa) >= @@ -3833,12 +4054,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) } (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, &errlist); - (void) nvlist_merge(errors, errlist, 0); - nvlist_free(errlist); + props, errors); } - if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) { + if (zc->zc_nvlist_dst_size != 0 && + (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || + put_nvlist(zc, errors) != 0)) { /* * Caller made zc->zc_nvlist_dst less than the minimum expected * size or supplied an invalid address. @@ -3970,15 +4191,13 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); rw_exit(&dp->dp_config_rwlock); - if (error) { - spa_close(spa, FTAG); + spa_close(spa, FTAG); + if (error) return (error); - } error = dmu_objset_from_ds(ds, &tosnap); if (error) { dsl_dataset_rele(ds, FTAG); - spa_close(spa, FTAG); return (error); } @@ -3986,7 +4205,6 @@ zfs_ioc_send(zfs_cmd_t *zc) rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); if (error) { dsl_dataset_rele(ds, FTAG); return (error); @@ -3997,12 +4215,37 @@ zfs_ioc_send(zfs_cmd_t *zc) dsl_dataset_rele(ds, FTAG); return (error); } - } else { - spa_close(spa, FTAG); + } + + if (zc->zc_obj) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (fromsnap != NULL) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + if (dsl_dir_is_clone(ds->ds_dir)) { + rw_enter(&dp->dp_config_rwlock, RW_READER); + error = dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); + rw_exit(&dp->dp_config_rwlock); + if (error) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + error = dmu_objset_from_ds(dsfrom, &fromsnap); + if (error) { + dsl_dataset_rele(dsfrom, FTAG); + dsl_dataset_rele(ds, FTAG); + return (error); + } + } } if (estimate) { - error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, + error = dmu_send_estimate(tosnap, fromsnap, &zc->zc_objset_type); } else { file_t *fp = getf(zc->zc_cookie); @@ -4014,7 +4257,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, zc->zc_obj, + error = dmu_send(tosnap, fromsnap, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4413,6 +4656,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) * zc_cleanup_fd cleanup-on-exit file descriptor for calling process * * outputs: + * zc_value short name of new snapshot */ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) @@ -4420,22 +4664,21 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) char *snap_name; int error; - snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, + snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value, (u_longlong_t)ddi_get_lbolt64()); - if (strlen(snap_name) >= MAXNAMELEN) { + if (strlen(snap_name) >= MAXPATHLEN) { strfree(snap_name); return (E2BIG); } - error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name, - NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd); + error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd); if (error != 0) { strfree(snap_name); return (error); } - (void) strcpy(zc->zc_value, snap_name); + (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1); strfree(snap_name); return (0); } @@ -4858,178 +5101,440 @@ zfs_ioc_space_written(zfs_cmd_t *zc) } /* - * inputs: - * zc_name full name of last snapshot - * zc_value full name of first snapshot + * innvl: { + * "firstsnap" -> snapshot name + * } * - * outputs: - * zc_cookie space in bytes - * zc_objset_type compressed space in bytes - * zc_perm_action uncompressed space in bytes + * outnvl: { + * "used" -> space in bytes + * "compressed" -> compressed space in bytes + * "uncompressed" -> uncompressed space in bytes + * } */ static int -zfs_ioc_space_snaps(zfs_cmd_t *zc) +zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) { int error; dsl_dataset_t *new, *old; + char *firstsnap; + uint64_t used, comp, uncomp; - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) + return (EINVAL); + + error = dsl_dataset_hold(lastsnap, FTAG, &new); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + error = dsl_dataset_hold(firstsnap, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); return (error); } - error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie, - &zc->zc_objset_type, &zc->zc_perm_action); + error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + fnvlist_add_uint64(outnvl, "used", used); + fnvlist_add_uint64(outnvl, "compressed", comp); + fnvlist_add_uint64(outnvl, "uncompressed", uncomp); return (error); } /* - * pool create, destroy, and export don't log the history as part of - * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export - * do the logging of those commands. + * innvl: { + * "fd" -> file descriptor to write stream to (int32) + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl is unused */ -static zfs_ioc_vec_t zfs_ioc_vec[] = { - { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_READONLY }, - { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_NONE }, - { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive, - DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME, - B_FALSE, POOL_CHECK_NONE }, - { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, - DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME, - B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE, - POOL_CHECK_NONE }, - { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY }, - { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, - POOL_CHECK_SUSPENDED }, - { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, - POOL_CHECK_NONE } -}; +/* ARGSUSED */ +static int +zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + offset_t off; + char *fromname; + int fd; + + error = nvlist_lookup_int32(innvl, "fd", &fd); + if (error != 0) + return (EINVAL); + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + { + file_t *fp = getf(fd); + if (fp == NULL) { + dmu_objset_rele(tosnap, FTAG); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + return (EBADF); + } + + off = fp->f_offset; + error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); + + if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) + fp->f_offset = off; + } + releasef(fd); + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + +/* + * Determine approximately how large a zfs send stream will be -- the number + * of bytes that will be written to the fd supplied to zfs_ioc_send_new(). + * + * innvl: { + * (optional) "fromsnap" -> full snap name to send an incremental from + * } + * + * outnvl: { + * "space" -> bytes of space (uint64) + * } + */ +static int +zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) +{ + objset_t *fromsnap = NULL; + objset_t *tosnap; + int error; + char *fromname; + uint64_t space; + + error = dmu_objset_hold(snapname, FTAG, &tosnap); + if (error) + return (error); + + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); + if (error == 0) { + error = dmu_objset_hold(fromname, FTAG, &fromsnap); + if (error) { + dmu_objset_rele(tosnap, FTAG); + return (error); + } + } + + error = dmu_send_estimate(tosnap, fromsnap, &space); + fnvlist_add_uint64(outnvl, "space", space); + + if (fromsnap != NULL) + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); + return (error); +} + + +static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; + +static void +zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + boolean_t log_history, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + vec->zvec_legacy_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_allow_log = log_history; + vec->zvec_pool_check = pool_check; +} + +/* + * See the block comment at the beginning of this file for details on + * each argument to this function. + */ +static void +zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck, + zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist, + boolean_t allow_log) +{ + zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST]; + + ASSERT3U(ioc, >=, ZFS_IOC_FIRST); + ASSERT3U(ioc, <, ZFS_IOC_LAST); + ASSERT3P(vec->zvec_legacy_func, ==, NULL); + ASSERT3P(vec->zvec_func, ==, NULL); + + /* if we are logging, the name must be valid */ + ASSERT(!allow_log || namecheck != NO_NAME); + + vec->zvec_name = name; + vec->zvec_func = func; + vec->zvec_secpolicy = secpolicy; + vec->zvec_namecheck = namecheck; + vec->zvec_pool_check = pool_check; + vec->zvec_smush_outnvlist = smush_outnvlist; + vec->zvec_allow_log = allow_log; +} + +static void +zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, boolean_t log_history, + zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + POOL_NAME, log_history, pool_check); +} + +static void +zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, pool_check); +} + +static void +zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config, + POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + NO_NAME, B_FALSE, POOL_CHECK_NONE); +} + +static void +zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc, + zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED); +} + +static void +zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) +{ + zfs_ioctl_register_dataset_read_secpolicy(ioc, func, + zfs_secpolicy_read); +} + +static void +zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, + zfs_secpolicy_func_t *secpolicy) +{ + zfs_ioctl_register_legacy(ioc, func, secpolicy, + DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); +} + +static void +zfs_ioctl_init(void) +{ + zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT, + zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY, + zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + + zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS, + zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send", ZFS_IOC_SEND_NEW, + zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE, + zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + + zfs_ioctl_register("create", ZFS_IOC_CREATE, + zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("clone", ZFS_IOC_CLONE, + zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS, + zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + /* IOCTLS that use the legacy function signature */ + + zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN, + zfs_ioc_pool_scan); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE, + zfs_ioc_pool_upgrade); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD, + zfs_ioc_vdev_add); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE, + zfs_ioc_vdev_remove); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE, + zfs_ioc_vdev_set_state); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH, + zfs_ioc_vdev_attach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH, + zfs_ioc_vdev_detach); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH, + zfs_ioc_vdev_setpath); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU, + zfs_ioc_vdev_setfru); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS, + zfs_ioc_pool_set_props); + zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT, + zfs_ioc_vdev_split); + zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID, + zfs_ioc_pool_reguid); + + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS, + zfs_ioc_pool_configs, zfs_secpolicy_none); + zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT, + zfs_ioc_pool_tryimport, zfs_secpolicy_config); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT, + zfs_ioc_inject_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT, + zfs_ioc_clear_fault, zfs_secpolicy_inject); + zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, + zfs_ioc_inject_list_next, zfs_secpolicy_inject); + + /* + * pool destroy, and export don't log the history as part of + * zfsdev_ioctl, but rather zfs_ioc_pool_export + * does the logging of those commands. + */ + zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props, + zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log, + zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME, + zfs_ioc_dsobj_to_dsname, + zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY, + zfs_ioc_pool_get_history, + zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE); + + zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen, + zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED); + + zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, + zfs_ioc_space_written); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, + zfs_ioc_get_holds); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, + zfs_ioc_objset_recvd_props); + zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, + zfs_ioc_next_obj); + zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL, + zfs_ioc_get_fsacl); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS, + zfs_ioc_objset_stats); + zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS, + zfs_ioc_objset_zplprops); + zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT, + zfs_ioc_dataset_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT, + zfs_ioc_snapshot_list_next); + zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS, + zfs_ioc_send_progress); + + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF, + zfs_ioc_diff, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS, + zfs_ioc_obj_to_stats, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH, + zfs_ioc_obj_to_path, zfs_secpolicy_diff); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE, + zfs_ioc_userspace_one, zfs_secpolicy_userspace_one); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY, + zfs_ioc_userspace_many, zfs_secpolicy_userspace_many); + zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND, + zfs_ioc_send, zfs_secpolicy_send); + + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop, + zfs_secpolicy_none); + zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy, + zfs_secpolicy_destroy); + zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback, + zfs_secpolicy_rollback); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename, + zfs_secpolicy_rename); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv, + zfs_secpolicy_recv); + zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, + zfs_secpolicy_promote); + zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, + zfs_secpolicy_hold); + zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, + zfs_secpolicy_release); + zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, + zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); + zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, + zfs_secpolicy_set_fsacl); + + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share, + zfs_secpolicy_share, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl, + zfs_secpolicy_smb_acl, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE, + zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT, + zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); + + /* + * ZoL functions + */ + zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor, + zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); + zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear, + zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE); +} int pool_status_check(const char *name, zfs_ioc_namecheck_t type, @@ -5192,60 +5697,135 @@ static long zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; - uint_t vec; - int error, rc, flag = 0; - - vec = cmd - ZFS_IOC; - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) + uint_t vecnum; + int error, rc, len, flag = 0; + const zfs_ioc_vec_t *vec; + char saved_poolname[MAXNAMELEN]; + nvlist_t *innvl = NULL; + + vecnum = cmd - ZFS_IOC_FIRST; + if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (-EINVAL); + vec = &zfs_ioc_vec[vecnum]; zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); - if (error != 0) + if (error != 0) { error = EFAULT; + goto out; + } - if ((error == 0) && !(flag & FKIOCTL)) - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED()); + zc->zc_iflags = flag & FKIOCTL; + if (zc->zc_nvlist_src_size != 0) { + error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &innvl); + if (error != 0) + goto out; + } /* * Ensure that all pool/dataset names are valid before we pass down to * the lower layers. */ - if (error == 0) { - zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; - zc->zc_iflags = flag & FKIOCTL; - switch (zfs_ioc_vec[vec].zvec_namecheck) { - case POOL_NAME: - if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; + switch (vec->zvec_namecheck) { + case POOL_NAME: + if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case DATASET_NAME: - if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) - error = EINVAL; + case DATASET_NAME: + if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) + error = EINVAL; + else error = pool_status_check(zc->zc_name, - zfs_ioc_vec[vec].zvec_namecheck, - zfs_ioc_vec[vec].zvec_pool_check); - break; + vec->zvec_namecheck, vec->zvec_pool_check); + break; - case NO_NAME: - break; - } + case NO_NAME: + break; } - if (error == 0) - error = zfs_ioc_vec[vec].zvec_func(zc); + if (error == 0 && !(flag & FKIOCTL)) + error = vec->zvec_secpolicy(zc, innvl, CRED()); + + if (error != 0) + goto out; + + /* legacy ioctls can modify zc_name */ + (void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname)); + len = strcspn(saved_poolname, "/@") + 1; + saved_poolname[len] = '\0'; + + if (vec->zvec_func != NULL) { + nvlist_t *outnvl; + int puterror = 0; + spa_t *spa; + nvlist_t *lognv = NULL; + + ASSERT(vec->zvec_legacy_func == NULL); + + /* + * Add the innvl to the lognv before calling the func, + * in case the func changes the innvl. + */ + if (vec->zvec_allow_log) { + lognv = fnvlist_alloc(); + fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL, + vec->zvec_name); + if (!nvlist_empty(innvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL, + innvl); + } + } + + outnvl = fnvlist_alloc(); + error = vec->zvec_func(zc->zc_name, innvl, outnvl); + + if (error == 0 && vec->zvec_allow_log && + spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (!nvlist_empty(outnvl)) { + fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL, + outnvl); + } + (void) spa_history_log_nvl(spa, lognv); + spa_close(spa, FTAG); + } + fnvlist_free(lognv); + + if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) { + int smusherror = 0; + if (vec->zvec_smush_outnvlist) { + smusherror = nvlist_smush(outnvl, + zc->zc_nvlist_dst_size); + } + if (smusherror == 0) + puterror = put_nvlist(zc, outnvl); + } + + if (puterror != 0) + error = puterror; + + nvlist_free(outnvl); + } else { + error = vec->zvec_legacy_func(zc); + } + +out: + nvlist_free(innvl); rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); - if (error == 0) { - if (rc != 0) - error = EFAULT; - if (zfs_ioc_vec[vec].zvec_his_log) - zfs_log_history(zc); + if (error == 0 && rc != 0) + error = EFAULT; + if (error == 0 && vec->zvec_allow_log) { + char *s = tsd_get(zfs_allow_log_key); + if (s != NULL) + strfree(s); + (void) tsd_set(zfs_allow_log_key, strdup(saved_poolname)); } kmem_free(zc, sizeof (zfs_cmd_t)); @@ -5307,8 +5887,12 @@ zfs_detach(void) list_destroy(&zfsdev_state_list); } -uint_t zfs_fsyncer_key; -extern uint_t rrw_tsd_key; +static void +zfs_allow_log_destroy(void *arg) +{ + char *poolname = arg; + strfree(poolname); +} #ifdef DEBUG #define ZFS_DEBUG_STR " (DEBUG mode)" @@ -5327,11 +5911,14 @@ _init(void) if ((error = zvol_init()) != 0) goto out1; + zfs_ioctl_init(); + if ((error = zfs_attach()) != 0) goto out2; tsd_create(&zfs_fsyncer_key, NULL); - tsd_create(&rrw_tsd_key, NULL); + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); + tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy); printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, " "ZFS pool version %s, ZFS filesystem version %s\n", @@ -5362,6 +5949,7 @@ _fini(void) tsd_destroy(&zfs_fsyncer_key); tsd_destroy(&rrw_tsd_key); + tsd_destroy(&zfs_allow_log_key); printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n", ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 9ae7ab500942..8fee441b14e5 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -1550,9 +1551,8 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) sa_register_update_callback(os, zfs_sa_upgrade); } - spa_history_log_internal(LOG_DS_UPGRADE, - dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", - zsb->z_version, newvers, dmu_objset_id(os)); + spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, + "from %llu to %llu", zsb->z_version, newvers); dmu_tx_commit(tx); From 4789bcee859d16d736eda80f4a57f6ca7a7deb7b Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Wed, 4 Sep 2013 07:00:57 -0500 Subject: [PATCH 2/2] Illumos #3464 3464 zfs synctask code needs restructuring Reviewed by: Dan Kimmel Reviewed by: Adam Leventhal Reviewed by: George Wilson Reviewed by: Christopher Siden Approved by: Garrett D'Amore References: illumos/illumos-gate@3b2aab18808792cbd248a12f1edf139b89833c13 Ported-by: Tim Chase --- cmd/zdb/zdb.c | 9 +- cmd/zfs/zfs_main.c | 116 +- cmd/zhack/zhack.c | 29 +- cmd/ztest/ztest.c | 196 +- include/libzfs.h | 6 +- include/libzfs_core.h | 4 + include/sys/Makefile.am | 5 +- include/sys/arc.h | 2 +- include/sys/dbuf.h | 9 +- include/sys/dmu.h | 52 +- include/sys/dmu_objset.h | 12 +- include/sys/dmu_send.h | 66 + include/sys/dmu_tx.h | 6 +- include/sys/dsl_dataset.h | 114 +- include/sys/dsl_destroy.h | 52 + include/sys/dsl_dir.h | 17 +- include/sys/dsl_pool.h | 16 +- include/sys/dsl_prop.h | 53 +- include/sys/dsl_synctask.h | 46 +- include/sys/dsl_userhold.h | 57 + include/sys/metaslab.h | 3 +- include/sys/nvpair.h | 1 + include/sys/refcount.h | 5 +- include/sys/rrwlock.h | 7 +- include/sys/spa.h | 2 +- include/sys/space_map.h | 2 + include/sys/txg.h | 9 +- include/sys/zfeature.h | 17 +- include/sys/zfs_context.h | 26 +- include/sys/zfs_debug.h | 12 +- include/sys/zfs_ioctl.h | 4 +- include/sys/zfs_znode.h | 3 +- include/sys/zil.h | 4 +- include/sys/zvol.h | 2 +- lib/libzfs/libzfs_config.c | 4 +- lib/libzfs/libzfs_dataset.c | 354 +-- lib/libzfs/libzfs_diff.c | 8 +- lib/libzfs/libzfs_fru.c | 2 +- lib/libzfs/libzfs_graph.c | 4 +- lib/libzfs/libzfs_import.c | 2 +- lib/libzfs/libzfs_iter.c | 4 +- lib/libzfs/libzfs_pool.c | 56 +- lib/libzfs/libzfs_sendrecv.c | 32 +- lib/libzfs_core/libzfs_core.c | 108 +- lib/libzpool/Makefile.am | 2 + lib/libzpool/kernel.c | 5 + man/man8/zfs.8 | 6 +- module/nvpair/fnvpair.c | 14 + module/zfs/Makefile.in | 2 + module/zfs/arc.c | 10 +- module/zfs/bplist.c | 8 + module/zfs/bpobj.c | 4 + module/zfs/dbuf.c | 85 +- module/zfs/dmu.c | 2 +- module/zfs/dmu_diff.c | 80 +- module/zfs/dmu_objset.c | 861 +++---- module/zfs/dmu_send.c | 916 ++++---- module/zfs/dmu_traverse.c | 42 +- module/zfs/dmu_tx.c | 33 +- module/zfs/dnode.c | 6 +- module/zfs/dnode_sync.c | 1 + module/zfs/dsl_dataset.c | 4143 +++++++++++---------------------- module/zfs/dsl_deleg.c | 138 +- module/zfs/dsl_destroy.c | 940 ++++++++ module/zfs/dsl_dir.c | 593 ++--- module/zfs/dsl_pool.c | 218 +- module/zfs/dsl_prop.c | 481 ++-- module/zfs/dsl_scan.c | 118 +- module/zfs/dsl_synctask.c | 249 +- module/zfs/dsl_userhold.c | 537 +++++ module/zfs/metaslab.c | 40 + module/zfs/refcount.c | 21 +- module/zfs/rrwlock.c | 41 +- module/zfs/sa.c | 6 +- module/zfs/spa.c | 83 +- module/zfs/spa_history.c | 26 +- module/zfs/spa_misc.c | 19 +- module/zfs/space_map.c | 43 +- module/zfs/txg.c | 37 +- module/zfs/zfs_ctldir.c | 43 +- module/zfs/zfs_ioctl.c | 1104 ++++----- module/zfs/zfs_vfsops.c | 67 +- module/zfs/zil.c | 140 +- module/zfs/zio.c | 8 +- module/zfs/zvol.c | 17 +- module/zpios/pios.c | 9 +- 86 files changed, 6559 insertions(+), 6177 deletions(-) create mode 100644 include/sys/dmu_send.h create mode 100644 include/sys/dsl_destroy.h create mode 100644 include/sys/dsl_userhold.h create mode 100644 module/zfs/dsl_destroy.c create mode 100644 module/zfs/dsl_userhold.c diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 060498ae2c50..b119a16e2f03 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1725,7 +1725,9 @@ dump_dir(objset_t *os) int print_header = 1; int i, error; + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (dds.dds_type < DMU_OST_NUMTYPES) type = objset_types[dds.dds_type]; @@ -2171,7 +2173,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zio_nowait(zio_read(NULL, spa, bp, data, size, zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - } zcb->zcb_readfails = 0; @@ -2365,8 +2366,10 @@ dump_block_stats(spa_t *spa) */ (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, count_block_cb, &zcb, NULL); - (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, - count_block_cb, &zcb, NULL); + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { + (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, + count_block_cb, &zcb, NULL); + } if (spa_feature_is_active(spa, &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 7176c94169b5..365f93ba4281 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -892,6 +892,7 @@ typedef struct destroy_cbdata { boolean_t cb_parsable; boolean_t cb_dryrun; nvlist_t *cb_nvl; + nvlist_t *cb_batchedsnaps; /* first snap in contiguous run */ char *cb_firstsnap; @@ -988,9 +989,27 @@ destroy_callback(zfs_handle_t *zhp, void *data) zfs_close(zhp); return (0); } + if (cb->cb_dryrun) { + zfs_close(zhp); + return (0); + } + + /* + * We batch up all contiguous snapshots (even of different + * filesystems) and destroy them with one ioctl. We can't + * simply do all snap deletions and then all fs deletions, + * because we must delete a clone before its origin. + */ + if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) { + fnvlist_add_boolean(cb->cb_batchedsnaps, name); + } else { + int error = zfs_destroy_snaps_nvl(g_zfs, + cb->cb_batchedsnaps, B_FALSE); + fnvlist_free(cb->cb_batchedsnaps); + cb->cb_batchedsnaps = fnvlist_alloc(); - if (!cb->cb_dryrun) { - if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || + if (error != 0 || + zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { zfs_close(zhp); return (-1); @@ -1146,8 +1165,10 @@ static int zfs_do_destroy(int argc, char **argv) { destroy_cbdata_t cb = { 0 }; + int rv = 0; + int err = 0; int c; - zfs_handle_t *zhp; + zfs_handle_t *zhp = NULL; char *at; zfs_type_t type = ZFS_TYPE_DATASET; @@ -1201,11 +1222,9 @@ zfs_do_destroy(int argc, char **argv) at = strchr(argv[0], '@'); if (at != NULL) { - int err = 0; /* Build the list of snaps to destroy in cb_nvl. */ - if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0) - nomem(); + cb.cb_nvl = fnvlist_alloc(); *at = '\0'; zhp = zfs_open(g_zfs, argv[0], @@ -1216,17 +1235,15 @@ zfs_do_destroy(int argc, char **argv) cb.cb_snapspec = at + 1; if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || cb.cb_error) { - zfs_close(zhp); - nvlist_free(cb.cb_nvl); - return (1); + rv = 1; + goto out; } if (nvlist_empty(cb.cb_nvl)) { (void) fprintf(stderr, gettext("could not find any " "snapshots to destroy; check snapshot names.\n")); - zfs_close(zhp); - nvlist_free(cb.cb_nvl); - return (1); + rv = 1; + goto out; } if (cb.cb_verbose) { @@ -1245,18 +1262,26 @@ zfs_do_destroy(int argc, char **argv) } if (!cb.cb_dryrun) { - if (cb.cb_doclones) + if (cb.cb_doclones) { + cb.cb_batchedsnaps = fnvlist_alloc(); err = destroy_clones(&cb); + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, B_FALSE); + } + if (err != 0) { + rv = 1; + goto out; + } + } if (err == 0) { - err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl, + err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl, cb.cb_defer_destroy); } } - zfs_close(zhp); - nvlist_free(cb.cb_nvl); if (err != 0) - return (1); + rv = 1; } else { /* Open the given dataset */ if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) @@ -1277,8 +1302,8 @@ zfs_do_destroy(int argc, char **argv) zfs_get_name(zhp)); (void) fprintf(stderr, gettext("use 'zpool destroy %s' " "to destroy the pool itself\n"), zfs_get_name(zhp)); - zfs_close(zhp); - return (1); + rv = 1; + goto out; } /* @@ -1288,30 +1313,42 @@ zfs_do_destroy(int argc, char **argv) if (!cb.cb_doclones && zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, &cb) != 0) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } if (cb.cb_error) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } + cb.cb_batchedsnaps = fnvlist_alloc(); if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) { - zfs_close(zhp); - return (1); + rv = 1; + goto out; } /* * Do the real thing. The callback will close the * handle regardless of whether it succeeds or not. */ - if (destroy_callback(zhp, &cb) != 0) - return (1); + err = destroy_callback(zhp, &cb); + zhp = NULL; + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, cb.cb_defer_destroy); + } + if (err != 0) + rv = 1; } - return (0); +out: + fnvlist_free(cb.cb_batchedsnaps); + fnvlist_free(cb.cb_nvl); + if (zhp != NULL) + zfs_close(zhp); + return (rv); } static boolean_t @@ -5081,28 +5118,12 @@ zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) return (error); } -/* - * zfs allow [-r] [-t] ... - * - * -r Recursively hold - * -t Temporary hold (hidden option) - * - * Apply a user-hold with the given tag to the list of snapshots. - */ static int zfs_do_allow(int argc, char **argv) { return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); } -/* - * zfs unallow [-r] [-t] ... - * - * -r Recursively hold - * -t Temporary hold (hidden option) - * - * Apply a user-hold with the given tag to the list of snapshots. - */ static int zfs_do_unallow(int argc, char **argv) { @@ -5116,7 +5137,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) int i; const char *tag; boolean_t recursive = B_FALSE; - boolean_t temphold = B_FALSE; const char *opts = holding ? "rt" : "r"; int c; @@ -5126,9 +5146,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) case 'r': recursive = B_TRUE; break; - case 't': - temphold = B_TRUE; - break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -5177,7 +5194,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) } if (holding) { if (zfs_hold(zhp, delim+1, tag, recursive, - temphold, B_FALSE, -1, 0, 0) != 0) + B_FALSE, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) @@ -5193,7 +5210,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) * zfs hold [-r] [-t] ... * * -r Recursively hold - * -t Temporary hold (hidden option) * * Apply a user-hold with the given tag to the list of snapshots. */ diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index 4f80dde6957b..99d26719450e 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -46,6 +46,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #include @@ -123,7 +124,7 @@ import_pool(const char *target, boolean_t readonly) spa_t *spa; nvpair_t *elem; nvlist_t *props; - const char *name; + char *name; kernel_init(readonly ? FREAD : (FREAD | FWRITE)); g_zfs = libzfs_init(); @@ -273,10 +274,10 @@ zhack_do_feature_stat(int argc, char **argv) } static void -feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_enable_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_enable(spa, feature, tx); spa_history_log_internal(spa, "zhack enable feature", tx, @@ -344,8 +345,8 @@ zhack_do_feature_enable(int argc, char **argv) if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid)) fatal("feature already enabled: %s", feature.fi_guid); - VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL, - feature_enable_sync, spa, &feature, 5)); + VERIFY0(dsl_sync_task(spa_name(spa), NULL, + feature_enable_sync, &feature, 5)); spa_close(spa, FTAG); @@ -353,10 +354,10 @@ zhack_do_feature_enable(int argc, char **argv) } static void -feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_incr_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_incr(spa, feature, tx); spa_history_log_internal(spa, "zhack feature incr", tx, @@ -364,10 +365,10 @@ feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx) } static void -feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx) +feature_decr_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_decr(spa, feature, tx); spa_history_log_internal(spa, "zhack feature decr", tx, @@ -442,8 +443,8 @@ zhack_do_feature_ref(int argc, char **argv) if (decr && !spa_feature_is_active(spa, &feature)) fatal("feature refcount already 0: %s", feature.fi_guid); - VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL, - decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5)); + VERIFY0(dsl_sync_task(spa_name(spa), NULL, + decr ? feature_decr_sync : feature_incr_sync, &feature, 5)); spa_close(spa, FTAG); } diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 28570a09075c..e192ab17a16b 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -106,10 +106,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -367,7 +369,7 @@ ztest_info_t ztest_info[] = { { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, - { ztest_vdev_attach_detach, 1, &zopt_rarely }, + { ztest_vdev_attach_detach, 1, &zopt_sometimes }, { ztest_vdev_LUN_growth, 1, &zopt_rarely }, { ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime }, @@ -1031,9 +1033,8 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, uint64_t curval; int error; - error = dsl_prop_set(osname, propname, - (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), - sizeof (value), 1, &value); + error = dsl_prop_set_int(osname, propname, + (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -1042,8 +1043,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, ASSERT0(error); setpoint = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); - VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval), - 1, &curval, setpoint), ==, 0); + VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); if (ztest_opts.zo_verbose >= 6) { VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); @@ -2484,8 +2484,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) int error; mutex_enter(&ztest_vdev_lock); - leaves = - MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; + leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -2507,7 +2506,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * prevent a race between removing a slog (dmu_objset_find) * and destroying a dataset. Removing the slog will * grab a reference on the dataset which may cause - * dmu_objset_destroy() to fail with EBUSY thus + * dsl_destroy_head() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ rw_enter(&ztest_name_lock, RW_WRITER); @@ -3196,7 +3195,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) /* * Verify that the dataset contains a directory object. */ - VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); error = dmu_object_info(os, ZTEST_DIROBJ, &doi); if (error != ENOENT) { /* We could have crashed in the middle of destroying it */ @@ -3204,12 +3203,16 @@ ztest_objset_destroy_cb(const char *name, void *arg) ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3S(doi.doi_physical_blocks_512, >=, 0); } - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); /* * Destroy the dataset. */ - VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE)); + if (strchr(name, '@') != NULL) { + VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); + } else { + VERIFY0(dsl_destroy_head(name)); + } return (0); } @@ -3219,16 +3222,17 @@ ztest_snapshot_create(char *osname, uint64_t id) char snapname[MAXNAMELEN]; int error; - (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, - (u_longlong_t)id); + (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); - error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1); + error = dmu_objset_snapshot_one(osname, snapname); if (error == ENOSPC) { ztest_record_enospc(FTAG); return (B_FALSE); } - if (error != 0 && error != EEXIST) - fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error); + if (error != 0 && error != EEXIST) { + fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, + snapname, error); + } return (B_TRUE); } @@ -3241,7 +3245,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id) (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, (u_longlong_t)id); - error = dmu_objset_destroy(snapname, B_FALSE); + error = dsl_destroy_snapshot(snapname, B_FALSE); if (error != 0 && error != ENOENT) fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); return (B_TRUE); @@ -3269,7 +3273,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * If this dataset exists from a previous run, process its replay log - * half of the time. If we don't replay it, then dmu_objset_destroy() + * half of the time. If we don't replay it, then dsl_destroy_head() * (invoked from ztest_objset_destroy_cb()) should just throw it away. */ if (ztest_random(2) == 0 && @@ -3291,7 +3295,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. */ - VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os)); + VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + FTAG, &os)); /* * Verify that we can create a new dataset. @@ -3305,8 +3310,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", name, error); } - VERIFY3U(0, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); ztest_zd_init(zdtmp, NULL, os); @@ -3396,21 +3400,21 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, (u_longlong_t)id); - error = dmu_objset_destroy(clone2name, B_FALSE); + error = dsl_destroy_head(clone2name); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); - error = dmu_objset_destroy(snap3name, B_FALSE); + fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); + error = dsl_destroy_snapshot(snap3name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error); - error = dmu_objset_destroy(snap2name, B_FALSE); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); + error = dsl_destroy_snapshot(snap2name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); - error = dmu_objset_destroy(clone1name, B_FALSE); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); + error = dsl_destroy_head(clone1name); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); - error = dmu_objset_destroy(snap1name, B_FALSE); + fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); + error = dsl_destroy_snapshot(snap1name, B_FALSE); if (error && error != ENOENT) - fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); + fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); umem_free(snap1name, MAXNAMELEN); umem_free(clone1name, MAXNAMELEN); @@ -3425,8 +3429,7 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) void ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) { - objset_t *clone; - dsl_dataset_t *ds; + objset_t *os; char *snap1name; char *clone1name; char *snap2name; @@ -3465,12 +3468,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); } - error = dmu_objset_hold(snap1name, FTAG, &clone); - if (error) - fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); - - error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); + error = dmu_objset_clone(clone1name, snap1name); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3497,12 +3495,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); } - error = dmu_objset_hold(snap3name, FTAG, &clone); - if (error) - fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); - - error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); - dmu_objset_rele(clone, FTAG); + error = dmu_objset_clone(clone2name, snap3name); if (error) { if (error == ENOSPC) { ztest_record_enospc(FTAG); @@ -3511,14 +3504,14 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds); + error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); if (error) - fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error); + fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); - dsl_dataset_disown(ds, FTAG); + dmu_objset_disown(os, FTAG); out: ztest_dsl_dataset_cleanup(osname, id); @@ -4392,7 +4385,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id) } count = -1ULL; - VERIFY(zap_count(os, object, &count) == 0); + VERIFY0(zap_count(os, object, &count)); ASSERT(count != -1ULL); /* @@ -4710,6 +4703,22 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) (void) rw_exit(&ztest_name_lock); } +static int +user_release_one(const char *snapname, const char *holdname) +{ + nvlist_t *snaps, *holds; + int error; + + snaps = fnvlist_alloc(); + holds = fnvlist_alloc(); + fnvlist_add_boolean(holds, holdname); + fnvlist_add_nvlist(snaps, snapname, holds); + fnvlist_free(holds); + error = dsl_dataset_user_release(snaps, NULL); + fnvlist_free(snaps); + return (error); +} + /* * Test snapshot hold/release and deferred destroy. */ @@ -4724,22 +4733,30 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char clonename[100]; char tag[100]; char osname[MAXNAMELEN]; + nvlist_t *holds; (void) rw_enter(&ztest_name_lock, RW_READER); dmu_objset_name(os, osname); - (void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id); - (void) snprintf(fullname, 100, "%s@%s", osname, snapname); - (void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id); - (void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id); + (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", (long long unsigned int)id); + (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); + (void) snprintf(clonename, sizeof (clonename), + "%s/ch1_%llu", osname, (long long unsigned int)id); + (void) snprintf(tag, sizeof (tag), "tag_%llu", (long long unsigned int)id); /* * Clean up from any previous run. */ - (void) dmu_objset_destroy(clonename, B_FALSE); - (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE); - (void) dmu_objset_destroy(fullname, B_FALSE); + error = dsl_destroy_head(clonename); + if (error != ENOENT) + ASSERT0(error); + error = user_release_one(fullname, tag); + if (error != ESRCH && error != ENOENT) + ASSERT0(error); + error = dsl_destroy_snapshot(fullname, B_FALSE); + if (error != ENOENT) + ASSERT0(error); /* * Create snapshot, clone it, mark snap for deferred destroy, @@ -4754,12 +4771,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dmu_objset_hold(fullname, FTAG, &origin); - if (error) - fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); - - error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0); - dmu_objset_rele(origin, FTAG); + error = dmu_objset_clone(clonename, fullname); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_clone"); @@ -4768,15 +4780,15 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); } - error = dmu_objset_destroy(fullname, B_TRUE); + error = dsl_destroy_snapshot(fullname, B_TRUE); if (error) { - fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", fullname, error); } - error = dmu_objset_destroy(clonename, B_FALSE); + error = dsl_destroy_head(clonename); if (error) - fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error); + fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); error = dmu_objset_hold(fullname, FTAG, &origin); if (error != ENOENT) @@ -4796,28 +4808,31 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); } - error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, - B_TRUE, -1); + holds = fnvlist_alloc(); + fnvlist_add_string(holds, fullname, tag); + error = dsl_dataset_user_hold(holds, 0, NULL); + fnvlist_free(holds); + if (error) fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); - error = dmu_objset_destroy(fullname, B_FALSE); + error = dsl_destroy_snapshot(fullname, B_FALSE); if (error != EBUSY) { - fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", fullname, error); } - error = dmu_objset_destroy(fullname, B_TRUE); + error = dsl_destroy_snapshot(fullname, B_TRUE); if (error) { - fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", + fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", fullname, error); } - error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE); + error = user_release_one(fullname, tag); if (error) - fatal(0, "dsl_dataset_user_release(%s)", fullname, tag); + fatal(0, "user_release_one(%s)", fullname, tag); - VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); + VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); out: (void) rw_exit(&ztest_name_lock); @@ -4947,7 +4962,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) * prevent a race between offlining a slog and * destroying a dataset. Offlining the slog will * grab a reference on the dataset which may cause - * dmu_objset_destroy() to fail with EBUSY thus + * dsl_destroy_head() to fail with EBUSY thus * leaving the dataset in an inconsistent state. */ if (islog) @@ -5084,8 +5099,12 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) */ for (i = 0; i < copies; i++) { uint64_t offset = i * blocksize; - VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db, - DMU_READ_NO_PREFETCH) == 0); + int error = dmu_buf_hold(os, object, offset, FTAG, &db, + DMU_READ_NO_PREFETCH); + if (error != 0) { + fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", + os, (long long)object, (long long) offset, error); + } ASSERT(db->db_offset == offset); ASSERT(db->db_size == blocksize); ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || @@ -5300,6 +5319,7 @@ ztest_spa_import_export(char *oldname, char *newname) nvlist_t *config, *newconfig; uint64_t pool_guid; spa_t *spa; + int error; if (ztest_opts.zo_verbose >= 4) { (void) printf("import/export: old = %s, new = %s\n", @@ -5344,7 +5364,12 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Import it under the new name. */ - VERIFY3U(0, ==, spa_import(newname, config, NULL, 0)); + error = spa_import(newname, config, NULL, 0); + if (error != 0) { + dump_nvlist(config, 0); + fatal(B_FALSE, "couldn't import pool %s as %s: error %u", + oldname, newname, error); + } ztest_walk_pool_directory("pools after import"); @@ -5551,7 +5576,7 @@ ztest_dataset_open(int d) } ASSERT(error == 0 || error == EEXIST); - VERIFY0(dmu_objset_hold(name, zd, &os)); + VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); (void) rw_exit(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -5592,7 +5617,7 @@ ztest_dataset_close(int d) ztest_ds_t *zd = &ztest_ds[d]; zil_close(zd->zd_zilog); - dmu_objset_rele(zd->zd_os, zd); + dmu_objset_disown(zd->zd_os, zd); ztest_zd_fini(zd); } @@ -5638,13 +5663,14 @@ ztest_run(ztest_shared_t *zs) * Open our pool. */ kernel_init(FREAD | FWRITE); - VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0); + VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); spa->spa_debug = B_TRUE; ztest_spa = spa; - VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os)); + VERIFY0(dmu_objset_own(ztest_opts.zo_pool, + DMU_OST_ANY, B_TRUE, FTAG, &os)); zs->zs_guid = dmu_objset_fsid_guid(os); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; diff --git a/include/libzfs.h b/include/libzfs.h index d51a71d7598b..3826c2cc8338 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -563,7 +563,7 @@ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); -extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t); +extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, @@ -606,8 +606,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_promote(zfs_handle_t *); -extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, - boolean_t, boolean_t, int, uint64_t, uint64_t); +extern int zfs_hold(zfs_handle_t *, const char *, const char *, + boolean_t, boolean_t, int); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 9edc884a14d1..f5fd6cda9f0d 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -46,6 +46,10 @@ int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist); int lzc_snaprange_space(const char *firstsnap, const char *lastsnap, uint64_t *usedp); +int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist); +int lzc_release(nvlist_t *holds, nvlist_t **errlist); +int lzc_get_holds(const char *snapname, nvlist_t **holdsp); + int lzc_send(const char *snapname, const char *fromsnap, int fd); int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, boolean_t force, int fd); diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 2245ff445564..34c715101054 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -12,6 +12,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/dmu.h \ $(top_srcdir)/include/sys/dmu_impl.h \ $(top_srcdir)/include/sys/dmu_objset.h \ + $(top_srcdir)/include/sys/dmu_send.h \ $(top_srcdir)/include/sys/dmu_traverse.h \ $(top_srcdir)/include/sys/dmu_tx.h \ $(top_srcdir)/include/sys/dmu_zfetch.h \ @@ -19,11 +20,13 @@ COMMON_H = \ $(top_srcdir)/include/sys/dsl_dataset.h \ $(top_srcdir)/include/sys/dsl_deadlist.h \ $(top_srcdir)/include/sys/dsl_deleg.h \ + $(top_srcdir)/include/sys/dsl_destroy.h \ $(top_srcdir)/include/sys/dsl_dir.h \ $(top_srcdir)/include/sys/dsl_pool.h \ $(top_srcdir)/include/sys/dsl_prop.h \ $(top_srcdir)/include/sys/dsl_scan.h \ $(top_srcdir)/include/sys/dsl_synctask.h \ + $(top_srcdir)/include/sys/dsl_userhold.h \ $(top_srcdir)/include/sys/efi_partition.h \ $(top_srcdir)/include/sys/metaslab.h \ $(top_srcdir)/include/sys/metaslab_impl.h \ @@ -65,8 +68,8 @@ COMMON_H = \ $(top_srcdir)/include/sys/zfs_sa.h \ $(top_srcdir)/include/sys/zfs_stat.h \ $(top_srcdir)/include/sys/zfs_vfsops.h \ - $(top_srcdir)/include/sys/zfs_znode.h \ $(top_srcdir)/include/sys/zfs_vnops.h \ + $(top_srcdir)/include/sys/zfs_znode.h \ $(top_srcdir)/include/sys/zil.h \ $(top_srcdir)/include/sys/zil_impl.h \ $(top_srcdir)/include/sys/zio_checksum.h \ diff --git a/include/sys/arc.h b/include/sys/arc.h index 67882197a5e1..8c10d947c386 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -100,7 +100,7 @@ arc_buf_t *arc_loan_buf(spa_t *spa, int size); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_add_ref(arc_buf_t *buf, void *tag); -int arc_buf_remove_ref(arc_buf_t *buf, void *tag); +boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag); int arc_buf_size(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 394fdfb151d7..8cd1fde01f9c 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -307,20 +307,17 @@ void dbuf_fini(void); boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); -#define DBUF_IS_METADATA(_db) \ - (dbuf_is_metadata(_db)) - #define DBUF_GET_BUFC_TYPE(_db) \ - (DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) + (dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) #define DBUF_IS_CACHEABLE(_db) \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \ - (DBUF_IS_METADATA(_db) && \ + (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) #define DBUF_IS_L2CACHEABLE(_db) \ ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \ - (DBUF_IS_METADATA(_db) && \ + (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA))) #define DBUF_IS_L2COMPRESSIBLE(_db) \ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index c50df391ecb3..b0db7604da7e 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -214,6 +214,11 @@ typedef enum dmu_object_type { DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), } dmu_object_type_t; +typedef enum txg_how { + TXG_WAIT = 1, + TXG_NOWAIT, +} txg_how_t; + void byteswap_uint64_array(void *buf, size_t size); void byteswap_uint32_array(void *buf, size_t size); void byteswap_uint16_array(void *buf, size_t size); @@ -252,22 +257,19 @@ void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); -int dmu_objset_evict_dbufs(objset_t *os); +void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); -int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, - uint64_t flags); -int dmu_objset_destroy(const char *name, boolean_t defer); -int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, +int dmu_objset_clone(const char *name, const char *origin); +int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, struct nvlist *errlist); -int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *); int dmu_objset_snapshot_one(const char *fsname, const char *snapname); int dmu_objset_snapshot_tmp(const char *, const char *, int); -int dmu_objset_rename(const char *name, const char *newname, - boolean_t recursive); int dmu_objset_find(char *name, int func(const char *, void *), void *arg, int flags); void dmu_objset_byteswap(void *buf, size_t size); +int dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive); typedef struct dmu_buf { uint64_t db_object; /* object that this buffer is part of */ @@ -537,7 +539,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object); void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); void dmu_tx_abort(dmu_tx_t *tx); -int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); @@ -785,36 +787,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_send(objset_t *tosnap, objset_t *fromsnap, - int outfd, struct vnode *vp, offset_t *off); -int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep); - -typedef struct dmu_recv_cookie { - /* - * This structure is opaque! - * - * If logical and real are different, we are recving the stream - * into the "real" temporary clone, and then switching it with - * the "logical" target. - */ - struct dsl_dataset *drc_logical_ds; - struct dsl_dataset *drc_real_ds; - struct drr_begin *drc_drrb; - char *drc_tosnap; - char *drc_top_ds; - boolean_t drc_newfs; - boolean_t drc_force; - struct avl_tree *drc_guid_to_ds_map; -} dmu_recv_cookie_t; - -int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *, - boolean_t force, objset_t *origin, dmu_recv_cookie_t *); -int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, - int cleanup_fd, uint64_t *action_handlep); -int dmu_recv_end(dmu_recv_cookie_t *drc); - -int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, - offset_t *off); +int dmu_diff(const char *tosnap_name, const char *fromsnap_name, + struct vnode *vp, offset_t *offp); /* CRC64 table */ #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 79d3a6bc05a9..7fe91bebef7e 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -44,6 +44,7 @@ extern "C" { extern krwlock_t os_lock; +struct dsl_pool; struct dsl_dataset; struct dmu_tx; @@ -115,8 +116,6 @@ struct objset { /* stuff we store for the user */ kmutex_t os_user_ptr_lock; void *os_user_ptr; - - /* SA layout/attribute registration */ sa_os_t *os_sa; }; @@ -146,10 +145,10 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat); void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp); uint64_t dmu_objset_fsid_guid(objset_t *os); -int dmu_objset_find_spa(spa_t *spa, const char *name, - int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); -int dmu_objset_prefetch(const char *name, void *arg); -int dmu_objset_evict_dbufs(objset_t *os); +int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj, + int func(struct dsl_pool *, struct dsl_dataset *, void *), + void *arg, int flags); +void dmu_objset_evict_dbufs(objset_t *os); timestruc_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ @@ -165,6 +164,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx); boolean_t dmu_objset_userused_enabled(objset_t *os); int dmu_objset_userspace_upgrade(objset_t *os); boolean_t dmu_objset_userspace_present(objset_t *os); +int dmu_fsname(const char *snapname, char *buf); void dmu_objset_init(void); void dmu_objset_fini(void); diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h new file mode 100644 index 000000000000..ee0885a60ff6 --- /dev/null +++ b/include/sys/dmu_send.h @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _DMU_SEND_H +#define _DMU_SEND_H + +#include +#include + +struct vnode; +struct dsl_dataset; +struct drr_begin; +struct avl_tree; + +int dmu_send(const char *tosnap, const char *fromsnap, int outfd, + struct vnode *vp, offset_t *off); +int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, + uint64_t *sizep); +int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, + int outfd, struct vnode *vp, offset_t *off); + +typedef struct dmu_recv_cookie { + struct dsl_dataset *drc_ds; + struct drr_begin *drc_drrb; + const char *drc_tofs; + const char *drc_tosnap; + boolean_t drc_newfs; + boolean_t drc_byteswap; + boolean_t drc_force; + struct avl_tree *drc_guid_to_ds_map; + zio_cksum_t drc_cksum; + uint64_t drc_newsnapobj; +} dmu_recv_cookie_t; + +int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, + boolean_t force, char *origin, dmu_recv_cookie_t *drc); +int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, + int cleanup_fd, uint64_t *action_handlep); +int dmu_recv_end(dmu_recv_cookie_t *drc); + +#endif /* _DMU_SEND_H */ diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h index 40c1ded5de8c..48a507e347a5 100644 --- a/include/sys/dmu_tx.h +++ b/include/sys/dmu_tx.h @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_DMU_TX_H #define _SYS_DMU_TX_H @@ -133,10 +136,11 @@ extern dmu_tx_stats_t dmu_tx_stats; * These routines are defined in dmu.h, and are called by the user. */ dmu_tx_t *dmu_tx_create(objset_t *dd); -int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); +int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how); void dmu_tx_commit(dmu_tx_t *tx); void dmu_tx_abort(dmu_tx_t *tx); uint64_t dmu_tx_get_txg(dmu_tx_t *tx); +struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 735ccbbd3058..494f11b90296 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -35,6 +35,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -48,10 +49,8 @@ struct dsl_pool; #define DS_IS_INCONSISTENT(ds) \ ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) /* - * NB: nopromote can not yet be set, but we want support for it in this - * on-disk version, so that we don't need to upgrade for it later. It - * will be needed when we implement 'zfs split' (where the split off - * clone should not be promoted). + * Note: nopromote can not yet be set, but we want support for it in this + * on-disk version, so that we don't need to upgrade for it later. */ #define DS_FLAG_NOPROMOTE (1ULL<<1) @@ -76,6 +75,8 @@ struct dsl_pool; */ #define DS_FLAG_CI_DATASET (1ULL<<16) +#define DS_CREATE_FLAG_NODIRTY (1ULL<<24) + typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */ uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */ @@ -125,9 +126,6 @@ typedef struct dsl_dataset { dsl_deadlist_t ds_deadlist; bplist_t ds_pending_deadlist; - /* to protect against multiple concurrent incremental recv */ - kmutex_t ds_recvlock; - /* protected by lock on pool's dp_dirty_datasets list */ txg_node_t ds_dirty_link; list_node_t ds_synced_link; @@ -139,13 +137,15 @@ typedef struct dsl_dataset { kmutex_t ds_lock; objset_t *ds_objset; uint64_t ds_userrefs; + void *ds_owner; /* - * ds_owner is protected by the ds_rwlock and the ds_lock + * Long holds prevent the ds from being destroyed; they allow the + * ds to remain held even after dropping the dp_config_rwlock. + * Owning counts as a long hold. See the comments above + * dsl_pool_hold() for details. */ - krwlock_t ds_rwlock; - kcondvar_t ds_exclusive_cv; - void *ds_owner; + refcount_t ds_longholds; /* no locking; only for making guesses */ uint64_t ds_trysnap_txg; @@ -163,76 +163,42 @@ typedef struct dsl_dataset { char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; -struct dsl_ds_destroyarg { - dsl_dataset_t *ds; /* ds to destroy */ - dsl_dataset_t *rm_origin; /* also remove our origin? */ - boolean_t is_origin_rm; /* set if removing origin snap */ - boolean_t defer; /* destroy -d requested? */ - boolean_t releasing; /* destroying due to release? */ - boolean_t need_prep; /* do we need to retry due to EBUSY? */ -}; - /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. */ #define MAX_TAG_PREFIX_LEN 17 -struct dsl_ds_holdarg { - dsl_sync_task_group_t *dstg; - const char *htag; - char *snapname; - boolean_t recursive; - boolean_t gotone; - boolean_t temphold; - char failed[MAXPATHLEN]; -}; - #define dsl_dataset_is_snapshot(ds) \ ((ds)->ds_phys->ds_num_children != 0) #define DS_UNIQUE_IS_ACCURATE(ds) \ (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) -int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp); -int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **); -int dsl_dataset_own(const char *name, boolean_t inconsistentok, +int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, + dsl_dataset_t **dsp); +int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **); +void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +int dsl_dataset_own(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); -void dsl_dataset_name(dsl_dataset_t *ds, char *name); -void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); + void *tag, dsl_dataset_t **dsp); void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); -void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag); -boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, - void *tag); -void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); +void dsl_dataset_name(dsl_dataset_t *ds, char *name); +boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, minor_t minor); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx); -int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer); -dsl_checkfunc_t dsl_dataset_destroy_check; -dsl_syncfunc_t dsl_dataset_destroy_sync; -dsl_syncfunc_t dsl_dataset_user_hold_sync; -int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); -void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx); -int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive); +int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); int dsl_dataset_promote(const char *name, char *conflsnap); -int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, - boolean_t force); -int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold, int cleanup_fd); -int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, - boolean_t temphold); -int dsl_dataset_user_release(char *dsname, char *snapname, char *htag, - boolean_t recursive); -int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, - char *htag, boolean_t retry); -int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); +int dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive); +int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, + minor_t cleanup_minor, const char *htag); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx); @@ -271,13 +237,35 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv); -int dsl_dataset_set_quota(const char *dsname, zprop_source_t source, +int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, uint64_t quota); -dsl_syncfunc_t dsl_dataset_set_quota_sync; -int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, +int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, uint64_t reservation); -int dsl_destroy_inconsistent(const char *dsname, void *arg); +boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier); +void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag); +void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag); +boolean_t dsl_dataset_long_held(dsl_dataset_t *ds); + +int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, boolean_t force); +void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, dmu_tx_t *tx); +int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx); +void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx); + +void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, + dmu_tx_t *tx); +void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds); +int dsl_dataset_get_snapname(dsl_dataset_t *ds); +int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, + uint64_t *value); +int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx); +void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, + zprop_source_t source, uint64_t value, dmu_tx_t *tx); +int dsl_dataset_rollback(const char *fsname); #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ diff --git a/include/sys/dsl_destroy.h b/include/sys/dsl_destroy.h new file mode 100644 index 000000000000..c5a70bb90e4c --- /dev/null +++ b/include/sys/dsl_destroy.h @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_DESTROY_H +#define _SYS_DSL_DESTROY_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct nvlist; +struct dsl_dataset; +struct dmu_tx; + +int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, + struct nvlist *errlist); +int dsl_destroy_snapshot(const char *name, boolean_t defer); +int dsl_destroy_head(const char *name); +int dsl_destroy_head_check_impl(struct dsl_dataset *ds, int expected_holds); +void dsl_destroy_head_sync_impl(struct dsl_dataset *ds, struct dmu_tx *tx); +int dsl_destroy_inconsistent(const char *dsname, void *arg); +void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *ds, + boolean_t defer, struct dmu_tx *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_DESTROY_H */ diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index 65ad202bba2d..2477e89af64a 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_DIR_H @@ -101,18 +102,15 @@ struct dsl_dir { char dd_myname[MAXNAMELEN]; }; -void dsl_dir_close(dsl_dir_t *dd, void *tag); -int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail); -int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **, - const char **tailp); -int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, +void dsl_dir_rele(dsl_dir_t *dd, void *tag); +int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, + dsl_dir_t **, const char **tail); +int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **); void dsl_dir_name(dsl_dir_t *dd, char *buf); int dsl_dir_namelen(dsl_dir_t *dd); uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx); -dsl_checkfunc_t dsl_dir_destroy_check; -dsl_syncfunc_t dsl_dir_destroy_sync; void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv); uint64_t dsl_dir_space_available(dsl_dir_t *dd, dsl_dir_t *ancestor, int64_t delta, int ondiskonly); @@ -131,14 +129,15 @@ int dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota); int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation); -int dsl_dir_rename(dsl_dir_t *dd, const char *newname); +int dsl_dir_rename(const char *oldname, const char *newname); int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space); -int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx); boolean_t dsl_dir_is_clone(dsl_dir_t *dd); void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds, uint64_t reservation, cred_t *cr, dmu_tx_t *tx); void dsl_dir_snap_cmtime_update(dsl_dir_t *dd); timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd); +void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, + dmu_tx_t *tx); /* internal reserved dir name */ #define MOS_DIR_NAME "$MOS" diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h index 4a4bf76ef527..51b588e6aecc 100644 --- a/include/sys/dsl_pool.h +++ b/include/sys/dsl_pool.h @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -129,7 +130,7 @@ typedef struct dsl_pool { * syncing context does not need to ever have it for read, since * nobody else could possibly have it for write. */ - krwlock_t dp_config_rwlock; + rrwlock_t dp_config_rwlock; zfs_all_blkstats_t *dp_blkstats; } dsl_pool_t; @@ -155,15 +156,20 @@ void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_mos_diduse_space(dsl_pool_t *dp, int64_t used, int64_t comp, int64_t uncomp); +void dsl_pool_config_enter(dsl_pool_t *dp, void *tag); +void dsl_pool_config_exit(dsl_pool_t *dp, void *tag); +boolean_t dsl_pool_config_held(dsl_pool_t *dp); taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp); -extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, - const char *tag, uint64_t *now, dmu_tx_t *tx); -extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, +int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, + const char *tag, uint64_t now, dmu_tx_t *tx); +int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx); -extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); +void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp); int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **); +int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp); +void dsl_pool_rele(dsl_pool_t *dp, void *tag); void dsl_pool_tx_assign_add_usecs(dsl_pool_t *dp, uint64_t usecs); diff --git a/include/sys/dsl_prop.h b/include/sys/dsl_prop.h index b0d9a52cdfd7..5fe18d6a7c55 100644 --- a/include/sys/dsl_prop.h +++ b/include/sys/dsl_prop.h @@ -54,58 +54,47 @@ typedef struct dsl_props_arg { zprop_source_t pa_source; } dsl_props_arg_t; -typedef struct dsl_prop_set_arg { - const char *psa_name; - zprop_source_t psa_source; - int psa_intsz; - int psa_numints; - const void *psa_value; - - /* - * Used to handle the special requirements of the quota and reservation - * properties. - */ - uint64_t psa_effective_value; -} dsl_prop_setarg_t; - int dsl_prop_register(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg); -int dsl_prop_numcb(struct dsl_dataset *ds); +void dsl_prop_notify_all(struct dsl_dir *dd); +boolean_t dsl_prop_hascb(struct dsl_dataset *ds); int dsl_prop_get(const char *ddname, const char *propname, int intsz, int numints, void *buf, char *setpoint); int dsl_prop_get_integer(const char *ddname, const char *propname, uint64_t *valuep, char *setpoint); int dsl_prop_get_all(objset_t *os, nvlist_t **nvp); -int dsl_prop_get_received(objset_t *os, nvlist_t **nvp); +int dsl_prop_get_received(const char *dsname, nvlist_t **nvp); int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname, int intsz, int numints, void *buf, char *setpoint); +int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname, + uint64_t *valuep); int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname, int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot); -dsl_syncfunc_t dsl_props_set_sync; -int dsl_prop_set(const char *ddname, const char *propname, - zprop_source_t source, int intsz, int numints, const void *buf); +void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source, + nvlist_t *props, dmu_tx_t *tx); +void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname, + zprop_source_t source, int intsz, int numints, const void *value, + dmu_tx_t *tx); int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl); +int dsl_prop_set_int(const char *dsname, const char *propname, + zprop_source_t source, uint64_t value); +int dsl_prop_set_string(const char *dsname, const char *propname, + zprop_source_t source, const char *value); +int dsl_prop_inherit(const char *dsname, const char *propname, + zprop_source_t source); -void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, - zprop_source_t source, uint64_t *value); -int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa); -#ifdef ZFS_DEBUG -void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa); -#define DSL_PROP_CHECK_PREDICTION(dd, psa) \ - dsl_prop_check_prediction((dd), (psa)) -#else -#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */ -#endif +int dsl_prop_predict(dsl_dir_t *dd, const char *propname, + zprop_source_t source, uint64_t value, uint64_t *newvalp); /* flag first receive on or after SPA_VERSION_RECVD_PROPS */ -boolean_t dsl_prop_get_hasrecvd(objset_t *os); -void dsl_prop_set_hasrecvd(objset_t *os); -void dsl_prop_unset_hasrecvd(objset_t *os); +boolean_t dsl_prop_get_hasrecvd(const char *dsname); +int dsl_prop_set_hasrecvd(const char *dsname); +void dsl_prop_unset_hasrecvd(const char *dsname); void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value); void dsl_prop_nvlist_add_string(nvlist_t *nv, diff --git a/include/sys/dsl_synctask.h b/include/sys/dsl_synctask.h index 9126290cdb5b..ef86fb64cf0c 100644 --- a/include/sys/dsl_synctask.h +++ b/include/sys/dsl_synctask.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_SYNCTASK_H @@ -34,43 +35,26 @@ extern "C" { struct dsl_pool; -typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *); -typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *); +typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *); +typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *); typedef struct dsl_sync_task { - list_node_t dst_node; + txg_node_t dst_node; + struct dsl_pool *dst_pool; + uint64_t dst_txg; + int dst_space; dsl_checkfunc_t *dst_checkfunc; dsl_syncfunc_t *dst_syncfunc; - void *dst_arg1; - void *dst_arg2; - int dst_err; + void *dst_arg; + int dst_error; + boolean_t dst_nowaiter; } dsl_sync_task_t; -typedef struct dsl_sync_task_group { - txg_node_t dstg_node; - list_t dstg_tasks; - struct dsl_pool *dstg_pool; - uint64_t dstg_txg; - int dstg_err; - int dstg_space; - boolean_t dstg_nowaiter; -} dsl_sync_task_group_t; - -dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp); -void dsl_sync_task_create(dsl_sync_task_group_t *dstg, - dsl_checkfunc_t *, dsl_syncfunc_t *, - void *arg1, void *arg2, int blocks_modified); -int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg); -void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); -void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg); -void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx); - -int dsl_sync_task_do(struct dsl_pool *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified); -void dsl_sync_task_do_nowait(struct dsl_pool *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx); +void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx); +int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified); +void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc, + void *arg, int blocks_modified, dmu_tx_t *tx); #ifdef __cplusplus } diff --git a/include/sys/dsl_userhold.h b/include/sys/dsl_userhold.h new file mode 100644 index 000000000000..56c6c8f47a87 --- /dev/null +++ b/include/sys/dsl_userhold.h @@ -0,0 +1,57 @@ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_USERHOLD_H +#define _SYS_DSL_USERHOLD_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct dsl_pool; +struct dsl_dataset; +struct dmu_tx; + +int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, + nvlist_t *errlist); +int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist); +int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl); +void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj, + const char *htag); +int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag, + boolean_t temphold, struct dmu_tx *tx); +void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag, + minor_t minor, uint64_t now, struct dmu_tx *tx); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DSL_USERHOLD_H */ diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 99912424b3dd..70f7af0a5480 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -57,6 +57,7 @@ extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now); extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); +extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp); extern void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp); extern void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp); diff --git a/include/sys/nvpair.h b/include/sys/nvpair.h index c502568a61c9..7a67870455f4 100644 --- a/include/sys/nvpair.h +++ b/include/sys/nvpair.h @@ -285,6 +285,7 @@ void fnvlist_pack_free(char *, size_t); nvlist_t *fnvlist_unpack(char *, size_t); nvlist_t *fnvlist_dup(nvlist_t *); void fnvlist_merge(nvlist_t *, nvlist_t *); +size_t fnvlist_num_pairs(nvlist_t *); void fnvlist_add_boolean(nvlist_t *, const char *); void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); diff --git a/include/sys/refcount.h b/include/sys/refcount.h index 1752c64e3e8b..e767a2389dab 100644 --- a/include/sys/refcount.h +++ b/include/sys/refcount.h @@ -50,15 +50,17 @@ typedef struct reference { typedef struct refcount { kmutex_t rc_mtx; + boolean_t rc_tracked; list_t rc_list; list_t rc_removed; int64_t rc_count; int64_t rc_removed_count; } refcount_t; -/* Note: refcount_t must be initialized with refcount_create() */ +/* Note: refcount_t must be initialized with refcount_create[_untracked]() */ void refcount_create(refcount_t *rc); +void refcount_create_untracked(refcount_t *rc); void refcount_destroy(refcount_t *rc); void refcount_destroy_many(refcount_t *rc, uint64_t number); int refcount_is_zero(refcount_t *rc); @@ -79,6 +81,7 @@ typedef struct refcount { } refcount_t; #define refcount_create(rc) ((rc)->rc_count = 0) +#define refcount_create_untracked(rc) ((rc)->rc_count = 0) #define refcount_destroy(rc) ((rc)->rc_count = 0) #define refcount_destroy_many(rc, number) ((rc)->rc_count = 0) #define refcount_is_zero(rc) ((rc)->rc_count == 0) diff --git a/include/sys/rrwlock.h b/include/sys/rrwlock.h index 8fde3a3beb58..25c8a52467e7 100644 --- a/include/sys/rrwlock.h +++ b/include/sys/rrwlock.h @@ -60,6 +60,7 @@ typedef struct rrwlock { refcount_t rr_anon_rcount; refcount_t rr_linked_rcount; boolean_t rr_writer_wanted; + boolean_t rr_track_all; } rrwlock_t; /* @@ -67,15 +68,19 @@ typedef struct rrwlock { * 'tag' must be the same in a rrw_enter() as in its * corresponding rrw_exit(). */ -void rrw_init(rrwlock_t *rrl); +void rrw_init(rrwlock_t *rrl, boolean_t track_all); void rrw_destroy(rrwlock_t *rrl); void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag); +void rrw_enter_read(rrwlock_t *rrl, void *tag); +void rrw_enter_write(rrwlock_t *rrl); void rrw_exit(rrwlock_t *rrl, void *tag); boolean_t rrw_held(rrwlock_t *rrl, krw_t rw); void rrw_tsd_destroy(void *arg); #define RRW_READ_HELD(x) rrw_held(x, RW_READER) #define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER) +#define RRW_LOCK_HELD(x) \ + (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER)) #ifdef __cplusplus } diff --git a/include/sys/spa.h b/include/sys/spa.h index 1af9137f8d00..401ae8343e96 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -422,7 +422,7 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, nvlist_t *zplprops); extern int spa_import_rootpool(char *devpath, char *devid); -extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, +extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); extern int spa_destroy(char *pool); diff --git a/include/sys/space_map.h b/include/sys/space_map.h index 2da80d29b7c9..c53074a00672 100644 --- a/include/sys/space_map.h +++ b/include/sys/space_map.h @@ -149,6 +149,8 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size); extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size); extern boolean_t space_map_contains(space_map_t *sm, uint64_t start, uint64_t size); +extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start, + uint64_t size, avl_index_t *wherep); extern void space_map_swap(space_map_t **msrc, space_map_t **mdest); extern void space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest); diff --git a/include/sys/txg.h b/include/sys/txg.h index f9d6dd421810..b9bbba8be291 100644 --- a/include/sys/txg.h +++ b/include/sys/txg.h @@ -45,9 +45,6 @@ extern "C" { /* Number of txgs worth of frees we defer adding to in-core spacemaps */ #define TXG_DEFER_SIZE 2 -#define TXG_WAIT 1ULL -#define TXG_NOWAIT 2ULL - typedef struct tx_cpu tx_cpu_t; typedef struct txg_handle { @@ -125,11 +122,11 @@ extern void txg_wait_callbacks(struct dsl_pool *dp); extern void txg_list_create(txg_list_t *tl, size_t offset); extern void txg_list_destroy(txg_list_t *tl); extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg); -extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg); -extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_remove(txg_list_t *tl, uint64_t txg); extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg); -extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg); +extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_head(txg_list_t *tl, uint64_t txg); extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg); diff --git a/include/sys/zfeature.h b/include/sys/zfeature.h index 481e85b1bad0..1a081e422d4a 100644 --- a/include/sys/zfeature.h +++ b/include/sys/zfeature.h @@ -26,7 +26,6 @@ #ifndef _SYS_ZFEATURE_H #define _SYS_ZFEATURE_H -#include #include #include "zfeature_common.h" @@ -34,14 +33,18 @@ extern "C" { #endif -extern boolean_t feature_is_supported(objset_t *os, uint64_t obj, +struct spa; +struct dmu_tx; +struct objset; + +extern boolean_t feature_is_supported(struct objset *os, uint64_t obj, uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat); -struct spa; -extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *); -extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *); -extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *); +extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *); +extern void spa_feature_enable(struct spa *, zfeature_info_t *, + struct dmu_tx *); +extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *); +extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *); extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *); extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 599b97a9b6c9..a126c058ebd9 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -209,8 +209,6 @@ typedef struct kthread { void * t_arg; } kthread_t; -#define tsd_get(key) pthread_getspecific(key) -#define tsd_set(key, val) pthread_setspecific(key, val) #define curthread zk_thread_current() #define thread_exit zk_thread_exit #define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ @@ -284,6 +282,12 @@ typedef int krw_t; #define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread) #define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) +#undef RW_LOCK_HELD +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) + +#undef RW_LOCK_HELD +#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x)) + extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg); extern void rw_destroy(krwlock_t *rwlp); extern void rw_enter(krwlock_t *rwlp, krw_t rw); @@ -320,6 +324,22 @@ extern void cv_broadcast(kcondvar_t *cv); #define cv_wait_interruptible(cv, mp) cv_wait(cv, mp) #define cv_wait_io(cv, mp) cv_wait(cv, mp) +/* + * Thread-specific data + */ +#define tsd_get(k) pthread_getspecific(k) +#define tsd_set(k, v) pthread_setspecific(k, v) +#define tsd_create(kp, d) pthread_key_create(kp, d) +#define tsd_destroy(kp) /* nothing */ + +/* + * Thread-specific data + */ +#define tsd_get(k) pthread_getspecific(k) +#define tsd_set(k, v) pthread_setspecific(k, v) +#define tsd_create(kp, d) pthread_key_create(kp, d) +#define tsd_destroy(kp) /* nothing */ + /* * kstat creation, installation and deletion */ @@ -592,7 +612,7 @@ typedef struct callb_cpr { extern char *kmem_vasprintf(const char *fmt, va_list adx); extern char *kmem_asprintf(const char *fmt, ...); -#define strfree(str) kmem_free((str), strlen(str)+1) +#define strfree(str) kmem_free((str), strlen(str) + 1) /* * Hostname information diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h index 591d0df8f711..7632d7420c14 100644 --- a/include/sys/zfs_debug.h +++ b/include/sys/zfs_debug.h @@ -43,11 +43,13 @@ extern int zfs_flags; extern int zfs_recover; -#define ZFS_DEBUG_DPRINTF 0x0001 -#define ZFS_DEBUG_DBUF_VERIFY 0x0002 -#define ZFS_DEBUG_DNODE_VERIFY 0x0004 -#define ZFS_DEBUG_SNAPNAMES 0x0008 -#define ZFS_DEBUG_MODIFY 0x0010 +#define ZFS_DEBUG_DPRINTF (1<<0) +#define ZFS_DEBUG_DBUF_VERIFY (1<<1) +#define ZFS_DEBUG_DNODE_VERIFY (1<<2) +#define ZFS_DEBUG_SNAPNAMES (1<<3) +#define ZFS_DEBUG_MODIFY (1<<4) +#define ZFS_DEBUG_SPA (1<<5) +#define ZFS_DEBUG_ZIO_FREE (1<<6) /* * Always log zfs debug messages to the spl debug subsystem as SS_USER1. diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 21bfe2b8f9b0..8838322a9a51 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -302,7 +302,6 @@ typedef struct zfs_cmd { uint64_t zc_history; /* really (char *) */ char zc_value[MAXPATHLEN * 2]; char zc_string[MAXNAMELEN]; - char zc_top_ds[MAXPATHLEN]; uint64_t zc_guid; uint64_t zc_nvlist_conf; /* really (char *) */ uint64_t zc_nvlist_conf_size; @@ -352,7 +351,8 @@ extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); -extern int zfs_unmount_snap(const char *, void *); +extern void zfs_unmount_snap(const char *); +extern void zfs_destroy_unmount_origin(const char *); enum zfsdev_state_type { ZST_ONEXIT, diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index bdddcc366b8d..aa9d9d288d13 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_FS_ZFS_ZNODE_H @@ -254,7 +255,7 @@ typedef struct znode { */ #define ZFS_ENTER(zsb) \ { \ - rrw_enter(&(zsb)->z_teardown_lock, RW_READER, FTAG); \ + rrw_enter_read(&(zsb)->z_teardown_lock, FTAG); \ if ((zsb)->z_unmounted) { \ ZFS_EXIT(zsb); \ return (EIO); \ diff --git a/include/sys/zil.h b/include/sys/zil.h index 589e28f83752..f3e00101ba3d 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -470,8 +470,8 @@ extern int zil_check_log_chain(const char *osname, void *txarg); extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx); extern void zil_clean(zilog_t *zilog, uint64_t synced_txg); -extern int zil_suspend(zilog_t *zilog); -extern void zil_resume(zilog_t *zilog); +extern int zil_suspend(const char *osname, void **cookiep); +extern void zil_resume(void *cookie); extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp); extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp); diff --git a/include/sys/zvol.h b/include/sys/zvol.h index c05f81a5ff54..640538c2cdaa 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -39,7 +39,7 @@ extern int zvol_get_stats(objset_t *os, nvlist_t *nv); extern boolean_t zvol_is_zvol(const char *); extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); extern int zvol_create_minor(const char *); -extern int zvol_create_minors(const char *); +extern int zvol_create_minors(char *); extern int zvol_remove_minor(const char *); extern void zvol_remove_minors(const char *); extern int zvol_set_volsize(const char *, uint64_t); diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c index 99b6e67c37c2..41756353378d 100644 --- a/lib/libzfs/libzfs_config.c +++ b/lib/libzfs/libzfs_config.c @@ -106,7 +106,7 @@ namespace_reload(libzfs_handle_t *hdl) nvlist_t *config; config_node_t *cn; nvpair_t *elem; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; void *cookie; if (hdl->libzfs_ns_gen == 0) { @@ -261,7 +261,7 @@ zpool_get_features(zpool_handle_t *zhp) int zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int error; nvlist_t *config; libzfs_handle_t *hdl = zhp->zpool_hdl; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 041750bca8c3..e43c7c6bdeac 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -313,7 +313,7 @@ get_recvd_props_ioctl(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *recvdprops; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int err; if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) @@ -376,7 +376,7 @@ static int get_stats(zfs_handle_t *zhp) { int rc = 0; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0) return (-1); @@ -439,7 +439,7 @@ make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) zfs_handle_t * make_dataset_handle(libzfs_handle_t *hdl, const char *path) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1); @@ -1427,7 +1427,7 @@ zfs_is_namespace_prop(zfs_prop_t prop) int zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int ret = -1; prop_changelist_t *cl = NULL; char errbuf[1024]; @@ -1553,7 +1553,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) int zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int ret; prop_changelist_t *cl; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -1728,7 +1728,7 @@ static int get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, char **source, uint64_t *val) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; nvlist_t *zplprops = NULL; struct mnttab mnt; char *mntopt_on = NULL; @@ -2002,10 +2002,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg) NULL, NULL, 0, B_TRUE) != 0) goto out; if (strcmp(gca->buf, gca->origin) == 0) { - if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) { - zfs_close(zhp); - return (no_memory(zhp->zfs_hdl)); - } + fnvlist_add_boolean(gca->value, zfs_get_name(zhp)); gca->numclones--; } @@ -2580,7 +2577,7 @@ zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue, zfs_userquota_prop_t *typep) { int err; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2640,7 +2637,7 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue) { int err; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; const char *snapname; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -2760,7 +2757,7 @@ static int check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, boolean_t accept_ancestor, int *prefixlen) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char parent[ZFS_MAXNAMELEN]; char *slash; zfs_handle_t *zhp; @@ -3120,7 +3117,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, int zfs_destroy(zfs_handle_t *zhp, boolean_t defer) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -3200,46 +3197,50 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); } else { - ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer); + ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer); } nvlist_free(dd.nvl); return (ret); } /* - * Destroys all the snapshots named in the nvlist. They must be underneath - * the zhp (either snapshots of it, or snapshots of its descendants). + * Destroys all the snapshots named in the nvlist. */ int -zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer) +zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) { int ret; nvlist_t *errlist; + nvpair_t *pair; ret = lzc_destroy_snaps(snaps, defer, &errlist); - if (ret != 0) { - nvpair_t *pair; - for (pair = nvlist_next_nvpair(errlist, NULL); - pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { - char errbuf[1024]; - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), - nvpair_name(pair)); + if (ret == 0) + return (0); - switch (fnvpair_value_int32(pair)) { - case EEXIST: - zfs_error_aux(zhp->zfs_hdl, - dgettext(TEXT_DOMAIN, - "snapshot is cloned")); - ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, - errbuf); - break; - default: - ret = zfs_standard_error(zhp->zfs_hdl, errno, - errbuf); - break; - } + if (nvlist_next_nvpair(errlist, NULL) == NULL) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); + + ret = zfs_standard_error(hdl, ret, errbuf); + } + for (pair = nvlist_next_nvpair(errlist, NULL); + pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) { + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"), + nvpair_name(pair)); + + switch (fnvpair_value_int32(pair)) { + case EEXIST: + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "snapshot is cloned")); + ret = zfs_error(hdl, EZFS_EXISTS, errbuf); + break; + default: + ret = zfs_standard_error(hdl, errno, errbuf); + break; } } @@ -3388,7 +3389,7 @@ int zfs_promote(zfs_handle_t *zhp) { libzfs_handle_t *hdl = zhp->zfs_hdl; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char parent[MAXPATHLEN]; char *cp; int ret; @@ -3726,7 +3727,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) { rollback_data_t cb = { 0 }; int err; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; boolean_t restore_resv = 0; uint64_t old_volsize = 0, new_volsize; zfs_prop_t resv_prop = { 0 }; @@ -3813,7 +3814,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, boolean_t force_unmount) { int ret; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char *delim; prop_changelist_t *cl = NULL; zfs_handle_t *zhrp = NULL; @@ -4032,7 +4033,7 @@ zvol_create_link(libzfs_handle_t *hdl, const char *dataset) static int zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char path[MAXPATHLEN]; int error; @@ -4096,7 +4097,7 @@ zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) int zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int timeout = 3000; /* in milliseconds */ int error = 0; int i; @@ -4289,7 +4290,7 @@ static int zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path, zfs_smb_acl_op_t cmd, char *resource1, char *resource2) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; nvlist_t *nvlist = NULL; int error; @@ -4371,7 +4372,7 @@ int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zfs_userspace_cb_t func, void *arg) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_useracct_t buf[100]; libzfs_handle_t *hdl = zhp->zfs_hdl; int ret; @@ -4408,37 +4409,83 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, return (0); } +struct holdarg { + nvlist_t *nvl; + const char *snapname; + const char *tag; + boolean_t recursive; +}; + +static int +zfs_hold_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + zfs_handle_t *szhp; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + szhp = make_dataset_handle(zhp->zfs_hdl, name); + if (szhp) { + fnvlist_add_string(ha->nvl, name, ha->tag); + zfs_close(szhp); + } + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); + zfs_close(zhp); + return (rv); +} + int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t temphold, boolean_t enoent_ok, - int cleanup_fd, uint64_t dsobj, uint64_t createtxg) + boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + int ret; + struct holdarg ha; + nvlist_t *errors; libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; - ASSERT(!recursive || dsobj == 0); + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); + ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + fnvlist_free(ha.nvl); - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) - >= sizeof (zc.zc_string)) - return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); - zc.zc_cookie = recursive; - zc.zc_temphold = temphold; - zc.zc_cleanup_fd = cleanup_fd; - zc.zc_sendobj = dsobj; - zc.zc_createtxg = createtxg; + if (ret == 0) + return (0); - if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN+32]; + if (nvlist_next_nvpair(errors, NULL) == NULL) { + /* no hold-specific errors */ + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot hold")); + switch (ret) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + default: + (void) zfs_standard_error(hdl, ret, errbuf); + } + } - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot hold '%s@%s'"), zc.zc_name, snapname); - switch (errno) { + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s'"), nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { case E2BIG: /* * Temporary tags wind up having the ds object id @@ -4446,77 +4493,133 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, * above, it's still possible for the tag to wind * up being slightly too long. */ - return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf)); - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool must be upgraded")); - return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + (void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf); + break; case EINVAL: - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; case EEXIST: - return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf)); + (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); + break; case ENOENT: if (enoent_ok) return (ENOENT); /* FALLTHROUGH */ default: - return (zfs_standard_error_fmt(hdl, errno, errbuf)); + (void) zfs_standard_error(hdl, + fnvpair_value_int32(elem), errbuf); } } - return (0); + fnvlist_free(errors); + return (ret); +} + +struct releasearg { + nvlist_t *nvl; + const char *snapname; + const char *tag; + boolean_t recursive; +}; + +static int +zfs_release_one(zfs_handle_t *zhp, void *arg) +{ + struct holdarg *ha = arg; + zfs_handle_t *szhp; + char name[ZFS_MAXNAMELEN]; + int rv = 0; + + (void) snprintf(name, sizeof (name), + "%s@%s", zhp->zfs_name, ha->snapname); + + szhp = make_dataset_handle(zhp->zfs_hdl, name); + if (szhp) { + nvlist_t *holds = fnvlist_alloc(); + fnvlist_add_boolean(holds, ha->tag); + fnvlist_add_nvlist(ha->nvl, name, holds); + zfs_close(szhp); + } + + if (ha->recursive) + rv = zfs_iter_filesystems(zhp, zfs_release_one, ha); + zfs_close(zhp); + return (rv); } int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + int ret; + struct holdarg ha; + nvlist_t *errors; + nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; - (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) - >= sizeof (zc.zc_string)) - return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); - zc.zc_cookie = recursive; + ha.nvl = fnvlist_alloc(); + ha.snapname = snapname; + ha.tag = tag; + ha.recursive = recursive; + (void) zfs_release_one(zfs_handle_dup(zhp), &ha); + ret = lzc_release(ha.nvl, &errors); + fnvlist_free(ha.nvl); - if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN+32]; + if (ret == 0) + return (0); + + if (nvlist_next_nvpair(errors, NULL) == NULL) { + /* no hold-specific errors */ + char errbuf[1024]; - /* - * if it was recursive, the one that actually failed will be in - * zc.zc_name. - */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot release '%s' from '%s@%s'"), tag, zc.zc_name, - snapname); + "cannot release")); switch (errno) { - case ESRCH: - return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); - return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + default: + (void) zfs_standard_error_fmt(hdl, errno, errbuf); + } + } + + for (elem = nvlist_next_nvpair(errors, NULL); + elem != NULL; + elem = nvlist_next_nvpair(errors, elem)) { + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot release hold from snapshot '%s'"), + nvpair_name(elem)); + switch (fnvpair_value_int32(elem)) { + case ESRCH: + (void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf); + break; case EINVAL: - return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + (void) zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; default: - return (zfs_standard_error_fmt(hdl, errno, errbuf)); + (void) zfs_standard_error_fmt(hdl, + fnvpair_value_int32(elem), errbuf); } } - return (0); + fnvlist_free(errors); + return (ret); } int zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; int nvsz = 2048; void *nvbuf; int err = 0; - char errbuf[ZFS_MAXNAMELEN+32]; + char errbuf[1024]; assert(zhp->zfs_type == ZFS_TYPE_VOLUME || zhp->zfs_type == ZFS_TYPE_FILESYSTEM); @@ -4578,10 +4681,10 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) int zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; char *nvbuf; - char errbuf[ZFS_MAXNAMELEN+32]; + char errbuf[1024]; size_t nvsz; int err; @@ -4632,38 +4735,18 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) int zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; - libzfs_handle_t *hdl = zhp->zfs_hdl; - int nvsz = 2048; - void *nvbuf; - int err = 0; - char errbuf[ZFS_MAXNAMELEN+32]; - - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - -tryagain: - - nvbuf = malloc(nvsz); - if (nvbuf == NULL) { - err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); - goto out; - } + int err; + char errbuf[1024]; - zc.zc_nvlist_dst_size = nvsz; - zc.zc_nvlist_dst = (uintptr_t)nvbuf; + err = lzc_get_holds(zhp->zfs_name, nvl); - (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + if (err != 0) { + libzfs_handle_t *hdl = zhp->zfs_hdl; - if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), - zc.zc_name); - switch (errno) { - case ENOMEM: - free(nvbuf); - nvsz = zc.zc_nvlist_dst_size; - goto tryagain; - + zhp->zfs_name); + switch (err) { case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); @@ -4679,19 +4762,8 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) err = zfs_standard_error_fmt(hdl, errno, errbuf); break; } - } else { - /* success */ - int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); - if (rc) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), - zc.zc_name); - err = zfs_standard_error_fmt(hdl, rc, errbuf); - } } - free(nvbuf); -out: return (err); } diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index d8ef6ff025b1..7472d246b012 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -90,7 +90,7 @@ static int get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, char *pn, int maxlen, zfs_stat_t *sb) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int error; (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name)); @@ -379,7 +379,7 @@ describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf, static int write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *lhdl = di->zhp->zfs_hdl; char fobjname[MAXPATHLEN]; @@ -507,7 +507,7 @@ static int make_temp_snapshot(differ_info_t *di) { libzfs_handle_t *hdl = di->zhp->zfs_hdl; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) snprintf(zc.zc_value, sizeof (zc.zc_value), ZDIFF_PREFIX, getpid()); @@ -749,7 +749,7 @@ int zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap, const char *tosnap, int flags) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char errbuf[1024]; differ_info_t di = { 0 }; pthread_t tid; diff --git a/lib/libzfs/libzfs_fru.c b/lib/libzfs/libzfs_fru.c index aa84aa30deb5..4e2fe9d07bfc 100644 --- a/lib/libzfs/libzfs_fru.c +++ b/lib/libzfs/libzfs_fru.c @@ -361,7 +361,7 @@ libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) int zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c index 3c5bdcc6767c..63d9138ef33f 100644 --- a/lib/libzfs/libzfs_graph.c +++ b/lib/libzfs/libzfs_graph.c @@ -379,7 +379,7 @@ zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, static int iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_vertex_t *zvp; /* @@ -473,7 +473,7 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) static boolean_t external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; /* * Check whether this dataset is a clone or has clones since diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index af6a43d8388f..53609f2cb73a 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -365,7 +365,7 @@ static nvlist_t * refresh_config(libzfs_handle_t *hdl, nvlist_t *config) { nvlist_t *nvl; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int err; if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c index ff76f9f367e2..3d8bc5e14803 100644 --- a/lib/libzfs/libzfs_iter.c +++ b/lib/libzfs/libzfs_iter.c @@ -103,7 +103,7 @@ zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc) int zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_handle_t *nzhp; int ret; @@ -140,7 +140,7 @@ int zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func, void *data) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_handle_t *nzhp; int ret; diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 45c39cc0facd..e8e7efca5c08 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -64,7 +64,7 @@ typedef struct prop_flags { static int zpool_get_all_props(zpool_handle_t *zhp) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); @@ -692,7 +692,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, int zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int ret = -1; char errbuf[1024]; nvlist_t *nvl = NULL; @@ -1141,7 +1141,7 @@ int zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t *fsprops) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; char msg[1024]; @@ -1275,7 +1275,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, int zpool_destroy(zpool_handle_t *zhp, const char *log_str) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_handle_t *zfp = NULL; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; @@ -1319,7 +1319,7 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str) int zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int ret; libzfs_handle_t *hdl = zhp->zpool_hdl; char msg[1024]; @@ -1446,7 +1446,7 @@ static int zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, const char *log_str) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -1721,7 +1721,7 @@ int zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, nvlist_t *props, int flags) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zpool_rewind_policy_t policy; nvlist_t *nv = NULL; nvlist_t *nvinfo = NULL; @@ -1913,7 +1913,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, int zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2389,7 +2389,7 @@ int zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, vdev_state_t *newstate) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; @@ -2473,7 +2473,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, int zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; @@ -2523,7 +2523,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) int zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2558,7 +2558,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) int zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -2612,7 +2612,7 @@ int zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; int ret; nvlist_t *tgt; @@ -2788,7 +2788,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, int zpool_vdev_detach(zpool_handle_t *zhp, const char *path) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache; @@ -2886,7 +2886,7 @@ int zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, nvlist_t *props, splitflags_t flags) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; nvlist_t **varray = NULL, *zc_props = NULL; @@ -3097,7 +3097,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, int zpool_vdev_remove(zpool_handle_t *zhp, const char *path) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; @@ -3142,7 +3142,7 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path) int zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; nvlist_t *tgt; zpool_rewind_policy_t policy; @@ -3218,7 +3218,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) int zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -3244,7 +3244,7 @@ zpool_reguid(zpool_handle_t *zhp) { char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name); @@ -3262,7 +3262,7 @@ zpool_reguid(zpool_handle_t *zhp) int zpool_reopen(zpool_handle_t *zhp) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -3342,7 +3342,7 @@ path_to_devid(const char *path) static void set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value)); @@ -3517,7 +3517,7 @@ zbookmark_compare(const void *a, const void *b) int zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; uint64_t count; zbookmark_t *zb = NULL; int i; @@ -3613,7 +3613,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) int zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strcpy(zc.zc_name, zhp->zpool_name); @@ -3641,7 +3641,7 @@ zfs_save_arguments(int argc, char **argv, char *string, int len) int zpool_log_history(libzfs_handle_t *hdl, const char *message) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; nvlist_t *args; int err; @@ -3667,7 +3667,7 @@ zpool_log_history(libzfs_handle_t *hdl, const char *message) static int get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); @@ -3804,7 +3804,7 @@ int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int *dropped, int block, int cleanup_fd) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int error = 0; *nvp = NULL; @@ -3863,7 +3863,7 @@ zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int zpool_events_clear(libzfs_handle_t *hdl, int *count) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -3882,7 +3882,7 @@ void zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; boolean_t mounted = B_FALSE; char *mntpnt = NULL; char dsname[MAXNAMELEN]; diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 5d0ab0eb4b71..28751b215d2c 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -812,7 +812,7 @@ static int estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, boolean_t fromorigin, uint64_t *sizep) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); @@ -876,7 +876,7 @@ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, boolean_t fromorigin, int outfd, nvlist_t *debugnv) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zfs_hdl; nvlist_t *thisdbg; @@ -978,9 +978,7 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) */ if (pzhp) { error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd, - zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID), - zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG)); + B_FALSE, B_TRUE, sdd->cleanup_fd); zfs_close(pzhp); } @@ -992,7 +990,7 @@ send_progress_thread(void *arg) { progress_arg_t *pa = arg; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = pa->pa_zhp; libzfs_handle_t *hdl = zhp->zfs_hdl; unsigned long long bytes; @@ -1195,7 +1193,7 @@ dump_filesystem(zfs_handle_t *zhp, void *arg) int rv = 0; send_dump_data_t *sdd = arg; boolean_t missingfrom = B_FALSE; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", zhp->zfs_name, sdd->tosnap); @@ -1683,7 +1681,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, int baselen, char *newname, recvflags_t *flags) { static int seq; - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int err; prop_changelist_t *clp; zfs_handle_t *zhp; @@ -1719,12 +1717,11 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, err = ENOENT; } - if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) { + if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) { seq++; - (void) strncpy(newname, name, baselen); - (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen, - "recv-%ld-%u", (long) getpid(), seq); + (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u", + baselen, name, getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); if (flags->verbose) { @@ -1756,7 +1753,7 @@ static int recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, char *newname, recvflags_t *flags) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int err = 0; prop_changelist_t *clp; zfs_handle_t *zhp; @@ -2015,7 +2012,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, stream_originguid, originguid)) { case 1: { /* promote it! */ - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; nvlist_t *origin_nvfs; char *origin_fsname; @@ -2087,7 +2084,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops", &props) && 0 == nvlist_lookup_nvlist(props, stream_snapname, &props)) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; zc.zc_cookie = B_TRUE; /* received */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), @@ -2518,7 +2515,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, uint64_t *action_handlep) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; time_t begin_time; int ioctl_err, ioctl_errno, err; char *cp; @@ -2649,7 +2646,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, /* * Determine name of destination snapshot, store in zc_value. */ - (void) strcpy(zc.zc_top_ds, tosnap); (void) strcpy(zc.zc_value, tosnap); (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); free(cp); @@ -2892,7 +2888,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zcmd_free_nvlists(&zc); if (err == 0 && snapprops_nvlist) { - zfs_cmd_t zc2 = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc2 = {"\0"}; (void) strcpy(zc2.zc_name, zc.zc_value); zc2.zc_cookie = B_TRUE; /* received */ diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index bff6902caaf9..44a2070d6028 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -118,7 +118,7 @@ static int lzc_ioctl(zfs_ioc_t ioc, const char *name, nvlist_t *source, nvlist_t **resultp) { - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; int error = 0; char *packed; size_t size; @@ -132,6 +132,7 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, zc.zc_nvlist_src_size = size; if (resultp != NULL) { + *resultp = NULL; zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); zc.zc_nvlist_dst = (uint64_t)(uintptr_t) malloc(zc.zc_nvlist_dst_size); @@ -159,8 +160,6 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, if (zc.zc_nvlist_dst_filled) { *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, zc.zc_nvlist_dst_size); - } else if (resultp != NULL) { - *resultp = NULL; } out: @@ -209,7 +208,7 @@ lzc_clone(const char *fsname, const char *origin, * The value will be the (int32) error code. * * The return value will be 0 if all snapshots were created, otherwise it will - * be the errno of a (undetermined) snapshot that failed. + * be the errno of a (unspecified) snapshot that failed. */ int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) @@ -258,7 +257,7 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) * The return value will be 0 if all snapshots were destroyed (or marked for * later destruction if 'defer' is set) or didn't exist to begin with. * - * Otherwise the return value will be the errno of a (undetermined) snapshot + * Otherwise the return value will be the errno of a (unspecified) snapshot * that failed, no snapshots will be destroyed, and the errlist will have an * entry for each snapshot that failed. The value in the errlist will be * the (int32) error code. @@ -326,12 +325,107 @@ lzc_exists(const char *dataset) * The objset_stats ioctl is still legacy, so we need to construct our * own zfs_cmd_t rather than using zfsc_ioctl(). */ - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); } +/* + * Create "user holds" on snapshots. If there is a hold on a snapshot, + * the snapshot can not be destroyed. (However, it can be marked for deletion + * by lzc_destroy_snaps(defer=B_TRUE).) + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is the name of the hold (string type). + * + * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * In this case, when the cleanup_fd is closed (including on process + * termination), the holds will be released. If the system is shut down + * uncleanly, the holds will be released when the pool is next opened + * or imported. + * + * The return value will be 0 if all holds were created. Otherwise the return + * value will be the errno of a (unspecified) hold that failed, no holds will + * be created, and the errlist will have an entry for each hold that + * failed (name = snapshot). The value in the errlist will be the error + * code (int32). + */ +int +lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvlist_t *args; + nvpair_t *elem; + int error; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + args = fnvlist_alloc(); + fnvlist_add_nvlist(args, "holds", holds); + if (cleanup_fd != -1) + fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); + + error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); + nvlist_free(args); + return (error); +} + +/* + * Release "user holds" on snapshots. If the snapshot has been marked for + * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have + * any clones, and all the user holds are removed, then the snapshot will be + * destroyed. + * + * The keys in the nvlist are snapshot names. + * The snapshots must all be in the same pool. + * The value is a nvlist whose keys are the holds to remove. + * + * The return value will be 0 if all holds were removed. + * Otherwise the return value will be the errno of a (unspecified) release + * that failed, no holds will be released, and the errlist will have an + * entry for each snapshot that has failed releases (name = snapshot). + * The value in the errlist will be the error code (int32) of a failed release. + */ +int +lzc_release(nvlist_t *holds, nvlist_t **errlist) +{ + char pool[MAXNAMELEN]; + nvpair_t *elem; + + /* determine the pool name */ + elem = nvlist_next_nvpair(holds, NULL); + if (elem == NULL) + return (0); + (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); + pool[strcspn(pool, "/@")] = '\0'; + + return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); +} + +/* + * Retrieve list of user holds on the specified snapshot. + * + * On success, *holdsp will be set to a nvlist which the caller must free. + * The keys are the names of the holds, and the value is the creation time + * of the hold (uint64) in seconds since the epoch. + */ +int +lzc_get_holds(const char *snapname, nvlist_t **holdsp) +{ + int error; + nvlist_t *innvl = fnvlist_alloc(); + error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp); + fnvlist_free(innvl); + return (error); +} + /* * If fromsnap is NULL, a full (non-incremental) stream will be sent. */ @@ -411,7 +505,7 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, * The receive ioctl is still legacy, so we need to construct our own * zfs_cmd_t rather than using zfsc_ioctl(). */ - zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"}; + zfs_cmd_t zc = {"\0"}; char *atp; char *packed = NULL; size_t size; diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index cbba38896f27..637dc15da893 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -45,6 +45,8 @@ libzpool_la_SOURCES = \ $(top_srcdir)/module/zfs/dsl_prop.c \ $(top_srcdir)/module/zfs/dsl_scan.c \ $(top_srcdir)/module/zfs/dsl_synctask.c \ + $(top_srcdir)/module/zfs/dsl_destroy.c \ + $(top_srcdir)/module/zfs/dsl_userhold.c \ $(top_srcdir)/module/zfs/fm.c \ $(top_srcdir)/module/zfs/gzip.c \ $(top_srcdir)/module/zfs/lzjb.c \ diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 0293b5eb5a2d..e4d645cf7797 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1042,6 +1043,8 @@ umem_out_of_memory(void) void kernel_init(int mode) { + extern uint_t rrw_tsd_key; + umem_nofail_callback(umem_out_of_memory); physmem = sysconf(_SC_PHYS_PAGES); @@ -1059,6 +1062,8 @@ kernel_init(int mode) system_taskq_init(); spa_init(mode); + + tsd_create(&rrw_tsd_key, rrw_tsd_destroy); } void diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index bd8bbfdcf69a..2067437dd7ce 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -1511,7 +1511,9 @@ Destroy (or mark for deferred destruction) all snapshots with this name in desce .ad .sp .6 .RS 4n -Recursively destroy all dependents. +Recursively destroy all clones of these snapshots, including the clones, +snapshots, and children. If this flag is specified, the \fB-d\fR flag will +have no effect. .RE .sp @@ -1547,7 +1549,7 @@ Print verbose information about the deleted data. .RE .sp -Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR +Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. .RE diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c index 7faea0fceb9a..a91b9524d8a0 100644 --- a/module/nvpair/fnvpair.c +++ b/module/nvpair/fnvpair.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifndef _KERNEL #include #endif @@ -114,6 +115,18 @@ fnvlist_merge(nvlist_t *dst, nvlist_t *src) VERIFY0(nvlist_merge(dst, src, KM_SLEEP)); } +size_t +fnvlist_num_pairs(nvlist_t *nvl) +{ + size_t count = 0; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL; + pair = nvlist_next_nvpair(nvl, pair)) + count++; + return (count); +} + void fnvlist_add_boolean(nvlist_t *nvl, const char *name) { @@ -563,5 +576,6 @@ EXPORT_SYMBOL(fnvpair_value_int64); EXPORT_SYMBOL(fnvpair_value_uint64); EXPORT_SYMBOL(fnvpair_value_string); EXPORT_SYMBOL(fnvpair_value_nvlist); +EXPORT_SYMBOL(fnvlist_num_pairs); #endif diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 81b1680e4132..e71228454110 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -93,3 +93,5 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_super.o $(MODULE)-objs += @top_srcdir@/module/zfs/zpl_xattr.o $(MODULE)-objs += @top_srcdir@/module/zfs/zrlock.o $(MODULE)-objs += @top_srcdir@/module/zfs/zvol.o +$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_destroy.o +$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_userhold.o diff --git a/module/zfs/arc.c b/module/zfs/arc.c index ce4a0239c0c0..1298c5b91bc6 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -1643,12 +1643,12 @@ arc_buf_free(arc_buf_t *buf, void *tag) } } -int +boolean_t arc_buf_remove_ref(arc_buf_t *buf, void* tag) { arc_buf_hdr_t *hdr = buf->b_hdr; kmutex_t *hash_lock = NULL; - int no_callback = (buf->b_efunc == NULL); + boolean_t no_callback = (buf->b_efunc == NULL); if (hdr->b_state == arc_anon) { ASSERT(hdr->b_datacnt == 1); @@ -1854,7 +1854,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle, ARCSTAT_INCR(arcstat_mutex_miss, missed); /* - * We have just evicted some date into the ghost state, make + * We have just evicted some data into the ghost state, make * sure we also adjust the ghost state size if necessary. */ if (arc_no_grow && @@ -2772,7 +2772,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) { if (zio == NULL || zio->io_error == 0) bcopy(buf->b_data, arg, buf->b_hdr->b_size); - VERIFY(arc_buf_remove_ref(buf, arg) == 1); + VERIFY(arc_buf_remove_ref(buf, arg)); } /* a generic arc_done_func_t */ @@ -2781,7 +2781,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; if (zio && zio->io_error) { - VERIFY(arc_buf_remove_ref(buf, arg) == 1); + VERIFY(arc_buf_remove_ref(buf, arg)); *bufp = NULL; } else { *bufp = buf; diff --git a/module/zfs/bplist.c b/module/zfs/bplist.c index d196351dcf96..c3927e74a2d8 100644 --- a/module/zfs/bplist.c +++ b/module/zfs/bplist.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp) mutex_exit(&bpl->bpl_lock); } +/* + * To aid debugging, we keep the most recently removed entry. This way if + * we are in the callback, we can easily locate the entry. + */ +static bplist_entry_t *bplist_iterate_last_removed; + void bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx) { @@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx) mutex_enter(&bpl->bpl_lock); while ((bpe = list_head(&bpl->bpl_list))) { + bplist_iterate_last_removed = bpe; list_remove(&bpl->bpl_list, bpe); mutex_exit(&bpl->bpl_lock); func(arg, &bpe->bpe_blk, tx); diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index 1920da4408c8..4ba9f8002e2d 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -366,6 +366,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) { bpobj_t subbpo; uint64_t used, comp, uncomp, subsubobjs; + ASSERTV(dmu_object_info_t doi); ASSERT(bpo->bpo_havesubobj); ASSERT(bpo->bpo_havecomp); @@ -392,6 +393,9 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); } + ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi)); + ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); + mutex_enter(&bpo->bpo_lock); dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index faa6cc345b8f..d655d66212ce 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -64,7 +64,7 @@ static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh, static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh); static void dbuf_destroy(dmu_buf_impl_t *db); -static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); /* @@ -546,7 +546,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) } else { ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT3P(db->db_buf, ==, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); db->db_state = DB_UNCACHED; } cv_broadcast(&db->db_changed); @@ -875,10 +875,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) continue; /* found a level 0 buffer in the range */ - if (dbuf_undirty(db, tx)) + mutex_enter(&db->db_mtx); + if (dbuf_undirty(db, tx)) { + /* mutex has been dropped and dbuf destroyed */ continue; + } - mutex_enter(&db->db_mtx); if (db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL || db->db_state == DB_EVICTING) { @@ -1005,7 +1007,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) mutex_enter(&db->db_mtx); dbuf_set_data(db, buf); - VERIFY(arc_buf_remove_ref(obuf, db) == 1); + VERIFY(arc_buf_remove_ref(obuf, db)); db->db.db_size = size; if (db->db_level == 0) { @@ -1306,7 +1308,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (dr); } -static int +/* + * Return TRUE if this evicted the dbuf. + */ +static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { dnode_t *dn; @@ -1315,18 +1320,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(txg != 0); ASSERT(db->db_blkid != DMU_BONUS_BLKID); + ASSERT0(db->db_level); + ASSERT(MUTEX_HELD(&db->db_mtx)); - mutex_enter(&db->db_mtx); /* * If this buffer is not dirty, we're done. */ for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) if (dr->dr_txg <= txg) break; - if (dr == NULL || dr->dr_txg < txg) { - mutex_exit(&db->db_mtx); - return (0); - } + if (dr == NULL || dr->dr_txg < txg) + return (B_FALSE); ASSERT(dr->dr_txg == txg); ASSERT(dr->dr_dbuf == db); @@ -1334,24 +1338,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn = DB_DNODE(db); /* - * If this buffer is currently held, we cannot undirty - * it, since one of the current holders may be in the - * middle of an update. Note that users of dbuf_undirty() - * should not place a hold on the dbuf before the call. - * Also note: we can get here with a spill block, so - * test for that similar to how dbuf_dirty does. + * Note: This code will probably work even if there are concurrent + * holders, but it is untested in that scenerio, as the ZPL and + * ztest have additional locking (the range locks) that prevents + * that type of concurrent access. */ - if (refcount_count(&db->db_holds) > db->db_dirtycnt) { - mutex_exit(&db->db_mtx); - /* Make sure we don't toss this buffer at sync phase */ - if (db->db_blkid != DMU_SPILL_BLKID) { - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); - } - DB_DNODE_EXIT(db); - return (0); - } + ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt); dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); @@ -1380,21 +1372,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) } DB_DNODE_EXIT(db); - if (db->db_level == 0) { - if (db->db_state != DB_NOFILL) { - dbuf_unoverride(dr); + if (db->db_state != DB_NOFILL) { + dbuf_unoverride(dr); - ASSERT(db->db_buf != NULL); - ASSERT(dr->dt.dl.dr_data != NULL); - if (dr->dt.dl.dr_data != db->db_buf) - VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, - db) == 1); - } - } else { ASSERT(db->db_buf != NULL); - ASSERT(list_head(&dr->dt.di.dr_children) == NULL); - mutex_destroy(&dr->dt.di.dr_mtx); - list_destroy(&dr->dt.di.dr_children); + ASSERT(dr->dt.dl.dr_data != NULL); + if (dr->dt.dl.dr_data != db->db_buf) + VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db)); } kmem_free(dr, sizeof (dbuf_dirty_record_t)); @@ -1406,13 +1390,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); dbuf_set_data(db, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); - return (1); + return (B_TRUE); } - mutex_exit(&db->db_mtx); - return (0); + return (B_FALSE); } #pragma weak dmu_buf_will_dirty = dbuf_will_dirty @@ -1511,7 +1494,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); xuio_stat_wbuf_copied(); return; } @@ -1529,10 +1512,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_release(db->db_buf, db); } dr->dt.dl.dr_data = buf; - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + VERIFY(arc_buf_remove_ref(db->db_buf, db)); } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { arc_release(db->db_buf, db); - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + VERIFY(arc_buf_remove_ref(db->db_buf, db)); } db->db_buf = NULL; } @@ -2168,10 +2151,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * This dbuf has anonymous data associated with it. */ dbuf_set_data(db, NULL); - VERIFY(arc_buf_remove_ref(buf, db) == 1); + VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); } else { - VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); + VERIFY(!arc_buf_remove_ref(db->db_buf, db)); /* * A dbuf will be eligible for eviction if either the @@ -2669,7 +2652,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_state != DB_NOFILL) { if (dr->dt.dl.dr_data != db->db_buf) VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, - db) == 1); + db)); else if (!arc_released(db->db_buf)) arc_set_callback(db->db_buf, dbuf_do_evict, db); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 0a903335655f..cbf4790b1799 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1382,7 +1382,7 @@ void dmu_return_arcbuf(arc_buf_t *buf) { arc_return_buf(buf, FTAG); - VERIFY(arc_buf_remove_ref(buf, FTAG) == 1); + VERIFY(arc_buf_remove_ref(buf, FTAG)); } /* diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c index dc237780c0b0..2d1aaa4c449d 100644 --- a/module/zfs/dmu_diff.c +++ b/module/zfs/dmu_diff.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -155,51 +156,49 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } int -dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp) +dmu_diff(const char *tosnap_name, const char *fromsnap_name, + struct vnode *vp, offset_t *offp) { struct diffarg da; - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap->os_dsl_dataset; - dsl_dataset_t *findds; - dsl_dataset_t *relds; - int err = 0; - - /* make certain we are looking at snapshots */ - if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds)) + dsl_dataset_t *fromsnap; + dsl_dataset_t *tosnap; + dsl_pool_t *dp; + int error; + uint64_t fromtxg; + + if (strchr(tosnap_name, '@') == NULL || + strchr(fromsnap_name, '@') == NULL) return (EINVAL); - /* fromsnap must be earlier and from the same lineage as tosnap */ - if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg) - return (EXDEV); - - relds = NULL; - findds = ds; - - while (fromds->ds_dir != findds->ds_dir) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - if (!dsl_dir_is_clone(findds->ds_dir)) { - if (relds) - dsl_dataset_rele(relds, FTAG); - return (EXDEV); - } - - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, - findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds); - rw_exit(&dp->dp_config_rwlock); + error = dsl_pool_hold(tosnap_name, FTAG, &dp); + if (error != 0) + return (error); - if (relds) - dsl_dataset_rele(relds, FTAG); + error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } - if (err) - return (EXDEV); + error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (error); + } - relds = findds; + if (!dsl_dataset_is_before(tosnap, fromsnap)) { + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); + return (EXDEV); } - if (relds) - dsl_dataset_rele(relds, FTAG); + fromtxg = fromsnap->ds_phys->ds_creation_txg; + dsl_dataset_rele(fromsnap, FTAG); + + dsl_dataset_long_hold(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); da.da_vp = vp; da.da_offp = offp; @@ -207,15 +206,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp) da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0; da.da_err = 0; - err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg, + error = traverse_dataset(tosnap, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da); - if (err) { - da.da_err = err; + if (error != 0) { + da.da_err = error; } else { /* we set the da.da_err we return as side-effect */ (void) write_record(&da); } + dsl_dataset_long_rele(tosnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + return (da.da_err); } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 0f07a4cc95d2..97a224b91187 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -45,6 +45,7 @@ #include #include #include +#include /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -283,7 +284,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); - if (err) { + if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) @@ -323,34 +324,49 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, * checksum/compression/copies. */ if (ds) { - err = dsl_prop_register(ds, "primarycache", + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "secondarycache", + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); + } if (!dsl_dataset_is_snapshot(ds)) { - if (err == 0) - err = dsl_prop_register(ds, "checksum", + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "compression", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "copies", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "dedup", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "logbias", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); - if (err == 0) - err = dsl_prop_register(ds, "sync", + } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); + } } - if (err) { + if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, - &os->os_phys_buf) == 1); + &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } @@ -428,44 +444,66 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) return (err); } -/* called from zpl */ +/* + * Holds the pool while the objset is held. Therefore only one objset + * can be held at a time. + */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp) { + dsl_pool_t *dp; dsl_dataset_t *ds; int err; - err = dsl_dataset_hold(name, tag, &ds); - if (err) + err = dsl_pool_hold(name, tag, &dp); + if (err != 0) + return (err); + err = dsl_dataset_hold(dp, name, tag, &ds); + if (err != 0) { + dsl_pool_rele(dp, tag); return (err); + } err = dmu_objset_from_ds(ds, osp); - if (err) + if (err != 0) { dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + } return (err); } -/* called from zpl */ +/* + * dsl_pool must not be held when this is called. + * Upon successful return, there will be a longhold on the dataset, + * and the dsl_pool will not be held. + */ int dmu_objset_own(const char *name, dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp) { + dsl_pool_t *dp; dsl_dataset_t *ds; int err; - err = dsl_dataset_own(name, B_FALSE, tag, &ds); - if (err) + err = dsl_pool_hold(name, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_own(dp, name, tag, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); return (err); + } err = dmu_objset_from_ds(ds, osp); - if (err) { + dsl_pool_rele(dp, FTAG); + if (err != 0) { dsl_dataset_disown(ds, tag); } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dmu_objset_disown(*osp, tag); + dsl_dataset_disown(ds, tag); return (EINVAL); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { - dmu_objset_disown(*osp, tag); + dsl_dataset_disown(ds, tag); return (EROFS); } return (err); @@ -474,7 +512,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type, void dmu_objset_rele(objset_t *os, void *tag) { + dsl_pool_t *dp = dmu_objset_pool(os); dsl_dataset_rele(os->os_dsl_dataset, tag); + dsl_pool_rele(dp, tag); } void @@ -483,7 +523,7 @@ dmu_objset_disown(objset_t *os, void *tag) dsl_dataset_disown(os->os_dsl_dataset, tag); } -int +void dmu_objset_evict_dbufs(objset_t *os) { dnode_t *dn; @@ -518,9 +558,7 @@ dmu_objset_evict_dbufs(objset_t *os) mutex_enter(&os->os_lock); dn = next_dn; } - dn = list_head(&os->os_dnodes); mutex_exit(&os->os_lock); - return (dn != DMU_META_DNODE(os)); } void @@ -535,33 +573,37 @@ dmu_objset_evict(objset_t *os) if (ds) { if (!dsl_dataset_is_snapshot(ds)) { - VERIFY(0 == dsl_prop_unregister(ds, "checksum", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "compression", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "copies", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "dedup", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "logbias", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "sync", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os)); } - VERIFY(0 == dsl_prop_unregister(ds, "primarycache", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os)); - VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os)); } if (os->os_sa) sa_tear_down(os); - /* - * We should need only a single pass over the dnode list, since - * nothing can be added to the list at this point. - */ - (void) dmu_objset_evict_dbufs(os); + dmu_objset_evict_dbufs(os); dnode_special_close(&os->os_meta_dnode); if (DMU_USERUSED_DNODE(os)) { @@ -572,7 +614,7 @@ dmu_objset_evict(objset_t *os) ASSERT3P(list_head(&os->os_dnodes), ==, NULL); - VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); + VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); /* * This is a barrier to prevent the objset from going away in @@ -604,10 +646,11 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); + if (ds != NULL) - VERIFY(0 == dmu_objset_from_ds(ds, &os)); + VERIFY0(dmu_objset_from_ds(ds, &os)); else - VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os)); + VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); mdn = DMU_META_DNODE(os); @@ -655,361 +698,181 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, return (os); } -struct oscarg { - void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); - void *userarg; - dsl_dataset_t *clone_origin; - const char *lastname; - dmu_objset_type_t type; - uint64_t flags; - cred_t *cr; -}; +typedef struct dmu_objset_create_arg { + const char *doca_name; + cred_t *doca_cred; + void (*doca_userfunc)(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + void *doca_userarg; + dmu_objset_type_t doca_type; + uint64_t doca_flags; +} dmu_objset_create_arg_t; /*ARGSUSED*/ static int -dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_objset_create_check(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - struct oscarg *oa = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; - int err; - uint64_t ddobj; - - err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, - oa->lastname, sizeof (uint64_t), 1, &ddobj); - if (err != ENOENT) - return (err ? err : EEXIST); + dmu_objset_create_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; + int error; - if (oa->clone_origin != NULL) { - /* You can't clone across pools. */ - if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool) - return (EXDEV); + if (strchr(doca->doca_name, '@') != NULL) + return (EINVAL); - /* You can only clone snapshots, not the head datasets. */ - if (!dsl_dataset_is_snapshot(oa->clone_origin)) - return (EINVAL); + error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); + if (error != 0) + return (error); + if (tail == NULL) { + dsl_dir_rele(pdd, FTAG); + return (EEXIST); } + dsl_dir_rele(pdd, FTAG); return (0); } static void -dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_objset_create_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - spa_t *spa = dd->dd_pool->dp_spa; - struct oscarg *oa = arg2; - uint64_t obj; + dmu_objset_create_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; dsl_dataset_t *ds; + uint64_t obj; blkptr_t *bp; + objset_t *os; - ASSERT(dmu_tx_is_syncing(tx)); + VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); - obj = dsl_dataset_create_sync(dd, oa->lastname, - oa->clone_origin, oa->flags, oa->cr, tx); + obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, + doca->doca_cred, tx); - VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); bp = dsl_dataset_get_blkptr(ds); - if (BP_IS_HOLE(bp)) { - objset_t *os = - dmu_objset_create_impl(spa, ds, bp, oa->type, tx); + os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, + ds, bp, doca->doca_type, tx); - if (oa->userfunc) - oa->userfunc(os, oa->userarg, oa->cr, tx); + if (doca->doca_userfunc != NULL) { + doca->doca_userfunc(os, doca->doca_userarg, + doca->doca_cred, tx); } - if (oa->clone_origin == NULL) { - spa_history_log_internal_ds(ds, "create", tx, ""); - } else { - char namebuf[MAXNAMELEN]; - dsl_dataset_name(oa->clone_origin, namebuf); - spa_history_log_internal_ds(ds, "clone", tx, - "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object); - } + spa_history_log_internal_ds(ds, "create", tx, ""); dsl_dataset_rele(ds, FTAG); + dsl_dir_rele(pdd, FTAG); } int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) { - dsl_dir_t *pdd; - const char *tail; - int err = 0; - struct oscarg oa = { 0 }; - - ASSERT(strchr(name, '@') == NULL); - err = dsl_dir_open(name, FTAG, &pdd, &tail); - if (err) - return (err); - if (tail == NULL) { - dsl_dir_close(pdd, FTAG); - return (EEXIST); - } + dmu_objset_create_arg_t doca; - oa.userfunc = func; - oa.userarg = arg; - oa.lastname = tail; - oa.type = type; - oa.flags = flags; - oa.cr = CRED(); + doca.doca_name = name; + doca.doca_cred = CRED(); + doca.doca_flags = flags; + doca.doca_userfunc = func; + doca.doca_userarg = arg; + doca.doca_type = type; - err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, - dmu_objset_create_sync, pdd, &oa, 5); - dsl_dir_close(pdd, FTAG); - return (err); + return (dsl_sync_task(name, + dmu_objset_create_check, dmu_objset_create_sync, &doca, 5)); } -int -dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags) +typedef struct dmu_objset_clone_arg { + const char *doca_clone; + const char *doca_origin; + cred_t *doca_cred; +} dmu_objset_clone_arg_t; + +/*ARGSUSED*/ +static int +dmu_objset_clone_check(void *arg, dmu_tx_t *tx) { + dmu_objset_clone_arg_t *doca = arg; dsl_dir_t *pdd; const char *tail; - int err = 0; - struct oscarg oa = { 0 }; + int error; + dsl_dataset_t *origin; + dsl_pool_t *dp = dmu_tx_pool(tx); - ASSERT(strchr(name, '@') == NULL); - err = dsl_dir_open(name, FTAG, &pdd, &tail); - if (err) - return (err); + if (strchr(doca->doca_clone, '@') != NULL) + return (EINVAL); + + error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); + if (error != 0) + return (error); if (tail == NULL) { - dsl_dir_close(pdd, FTAG); + dsl_dir_rele(pdd, FTAG); return (EEXIST); } - - oa.lastname = tail; - oa.clone_origin = clone_origin; - oa.flags = flags; - oa.cr = CRED(); - - err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check, - dmu_objset_create_sync, pdd, &oa, 5); - dsl_dir_close(pdd, FTAG); - return (err); -} - -int -dmu_objset_destroy(const char *name, boolean_t defer) -{ - dsl_dataset_t *ds; - int error; - - error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); - if (error == 0) { - error = dsl_dataset_destroy(ds, FTAG, defer); - /* dsl_dataset_destroy() closes the ds. */ + /* You can't clone across pools. */ + if (pdd->dd_pool != dp) { + dsl_dir_rele(pdd, FTAG); + return (EXDEV); } + dsl_dir_rele(pdd, FTAG); - return (error); -} - -typedef struct snapallarg { - dsl_sync_task_group_t *saa_dstg; - boolean_t saa_needsuspend; - nvlist_t *saa_props; - - /* the following are used only if 'temporary' is set: */ - boolean_t saa_temporary; - const char *saa_htag; - struct dsl_ds_holdarg *saa_ha; - dsl_dataset_t *saa_newds; -} snapallarg_t; - -typedef struct snaponearg { - const char *soa_longname; /* long snap name */ - const char *soa_snapname; /* short snap name */ - snapallarg_t *soa_saa; -} snaponearg_t; - -static int -snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - objset_t *os = arg1; - snaponearg_t *soa = arg2; - snapallarg_t *saa = soa->soa_saa; - int error; - - /* The props have already been checked by zfs_check_userprops(). */ - - error = dsl_dataset_snapshot_check(os->os_dsl_dataset, - soa->soa_snapname, tx); - if (error) + error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); + if (error != 0) return (error); - if (saa->saa_temporary) { - /* - * Ideally we would just call - * dsl_dataset_user_hold_check() and - * dsl_dataset_destroy_check() here. However the - * dataset we want to hold and destroy is the snapshot - * that we just confirmed we can create, but it won't - * exist until after these checks are run. Do any - * checks we can here and if more checks are added to - * those routines in the future, similar checks may be - * necessary here. - */ - if (spa_version(os->os_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); - /* - * Not checking number of tags because the tag will be - * unique, as it will be the only tag. - */ - if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - return (E2BIG); - - saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg), - KM_PUSHPAGE); - saa->saa_ha->temphold = B_TRUE; - saa->saa_ha->htag = saa->saa_htag; + /* You can't clone across pools. */ + if (origin->ds_dir->dd_pool != dp) { + dsl_dataset_rele(origin, FTAG); + return (EXDEV); } - return (error); -} -static void -snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - objset_t *os = arg1; - dsl_dataset_t *ds = os->os_dsl_dataset; - snaponearg_t *soa = arg2; - snapallarg_t *saa = soa->soa_saa; - - dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx); - - if (saa->saa_props != NULL) { - dsl_props_arg_t pa; - pa.pa_props = saa->saa_props; - pa.pa_source = ZPROP_SRC_LOCAL; - dsl_props_set_sync(ds->ds_prev, &pa, tx); + /* You can only clone snapshots, not the head datasets. */ + if (!dsl_dataset_is_snapshot(origin)) { + dsl_dataset_rele(origin, FTAG); + return (EINVAL); } + dsl_dataset_rele(origin, FTAG); - if (saa->saa_temporary) { - struct dsl_ds_destroyarg da; - - dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx); - kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg)); - saa->saa_ha = NULL; - saa->saa_newds = ds->ds_prev; - - da.ds = ds->ds_prev; - da.defer = B_TRUE; - dsl_dataset_destroy_sync(&da, FTAG, tx); - } + return (0); } -static int -snapshot_one_impl(const char *snapname, void *arg) +static void +dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) { - char *fsname; - snapallarg_t *saa = arg; - snaponearg_t *soa; - objset_t *os; - int err; - - fsname = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); - (void) strlcpy(fsname, snapname, MAXPATHLEN); - strchr(fsname, '@')[0] = '\0'; - - err = dmu_objset_hold(fsname, saa, &os); - kmem_free(fsname, MAXPATHLEN); - if (err != 0) - return (err); - - /* - * If the objset is in an inconsistent state (eg, in the process - * of being destroyed), don't snapshot it. - */ - if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_rele(os, saa); - return (EBUSY); - } - - if (saa->saa_needsuspend) { - err = zil_suspend(dmu_objset_zil(os)); - if (err) { - dmu_objset_rele(os, saa); - return (err); - } - } + dmu_objset_clone_arg_t *doca = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *pdd; + const char *tail; + dsl_dataset_t *origin, *ds; + uint64_t obj; + char namebuf[MAXNAMELEN]; - soa = kmem_zalloc(sizeof (*soa), KM_PUSHPAGE); - soa->soa_saa = saa; - soa->soa_longname = snapname; - soa->soa_snapname = strchr(snapname, '@') + 1; + VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); + VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); - dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync, - os, soa, 3); + obj = dsl_dataset_create_sync(pdd, tail, origin, 0, + doca->doca_cred, tx); - return (0); + VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + dsl_dataset_name(origin, namebuf); + spa_history_log_internal_ds(ds, "clone", tx, + "origin=%s (%llu)", namebuf, origin->ds_object); + dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); } -/* - * The snapshots must all be in the same pool. - */ int -dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) +dmu_objset_clone(const char *clone, const char *origin) { - dsl_sync_task_t *dst; - snapallarg_t saa = { 0 }; - spa_t *spa; - int rv = 0; - int err; - nvpair_t *pair; - - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) - return (0); - - err = spa_open(nvpair_name(pair), &spa, FTAG); - if (err) - return (err); - saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - saa.saa_props = props; - saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - err = snapshot_one_impl(nvpair_name(pair), &saa); - if (err != 0) { - if (errors != NULL) { - fnvlist_add_int32(errors, - nvpair_name(pair), err); - } - rv = err; - } - } + dmu_objset_clone_arg_t doca; - /* - * If any call to snapshot_one_impl() failed, don't execute the - * sync task. The error handling code below will clean up the - * snaponearg_t from any successful calls to - * snapshot_one_impl(). - */ - if (rv == 0) - err = dsl_sync_task_group_wait(saa.saa_dstg); - if (err != 0) - rv = err; - - for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; - dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { - objset_t *os = dst->dst_arg1; - snaponearg_t *soa = dst->dst_arg2; - if (dst->dst_err != 0) { - if (errors != NULL) { - fnvlist_add_int32(errors, - soa->soa_longname, dst->dst_err); - } - rv = dst->dst_err; - } - - if (saa.saa_needsuspend) - zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, &saa); - kmem_free(soa, sizeof (*soa)); - } + doca.doca_clone = clone; + doca.doca_origin = origin; + doca.doca_cred = CRED(); - dsl_sync_task_group_destroy(saa.saa_dstg); - spa_close(spa, FTAG); - return (rv); + return (dsl_sync_task(clone, + dmu_objset_clone_check, dmu_objset_clone_sync, &doca, 5)); } int @@ -1020,59 +883,12 @@ dmu_objset_snapshot_one(const char *fsname, const char *snapname) nvlist_t *snaps = fnvlist_alloc(); fnvlist_add_boolean(snaps, longsnap); - err = dmu_objset_snapshot(snaps, NULL, NULL); - fnvlist_free(snaps); strfree(longsnap); + err = dsl_dataset_snapshot(snaps, NULL, NULL); + fnvlist_free(snaps); return (err); } -int -dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd) -{ - dsl_sync_task_t *dst; - snapallarg_t saa = { 0 }; - spa_t *spa; - minor_t minor; - int err; - - err = spa_open(snapname, &spa, FTAG); - if (err) - return (err); - saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - saa.saa_htag = tag; - saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); - saa.saa_temporary = B_TRUE; - - if (cleanup_fd < 0) { - spa_close(spa, FTAG); - return (EINVAL); - } - if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) { - spa_close(spa, FTAG); - return (err); - } - - err = snapshot_one_impl(snapname, &saa); - - if (err == 0) - err = dsl_sync_task_group_wait(saa.saa_dstg); - - for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst; - dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) { - objset_t *os = dst->dst_arg1; - dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor); - if (saa.saa_needsuspend) - zil_resume(dmu_objset_zil(os)); - dmu_objset_rele(os, &saa); - } - - zfs_onexit_fd_rele(cleanup_fd); - dsl_sync_task_group_destroy(saa.saa_dstg); - spa_close(spa, FTAG); - return (err); -} - - static void dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx) { @@ -1110,9 +926,9 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; - ASSERT(bp == os->os_rootbp); - ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET); - ASSERT(BP_GET_LEVEL(bp) == 0); + ASSERT3P(bp, ==, os->os_rootbp); + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); + ASSERT0(BP_GET_LEVEL(bp)); /* * Update rootbp fill count: it should be the number of objects @@ -1220,7 +1036,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; while ((dr = list_head(list))) { - ASSERT(dr->dr_dbuf->db_level == 0); + ASSERT0(dr->dr_dbuf->db_level); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); @@ -1514,12 +1330,12 @@ dmu_objset_userspace_upgrade(objset_t *os) return (EINTR); objerr = dmu_bonus_hold(os, obj, FTAG, &db); - if (objerr) + if (objerr != 0) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); - if (objerr) { + if (objerr != 0) { dmu_tx_abort(tx); continue; } @@ -1602,6 +1418,8 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, zap_cursor_t cursor; zap_attribute_t attr; + ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); + if (ds->ds_phys->ds_snapnames_zapobj == 0) return (ENOENT); @@ -1674,42 +1492,122 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, return (0); } -struct findarg { - int (*func)(const char *, void *); - void *arg; -}; - -/* ARGSUSED */ -static int -findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) -{ - struct findarg *fa = arg; - return (fa->func(dsname, fa->arg)); -} - /* - * Find all objsets under name, and for each, call 'func(child_name, arg)'. - * Perhaps change all callers to use dmu_objset_find_spa()? + * Find objsets under and including ddobj, call func(ds) on each. */ int -dmu_objset_find(char *name, int func(const char *, void *), void *arg, - int flags) +dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, + int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) { - struct findarg fa; - fa.func = func; - fa.arg = arg; - return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags)); + dsl_dir_t *dd; + dsl_dataset_t *ds; + zap_cursor_t zc; + zap_attribute_t *attr; + uint64_t thisobj; + int err; + + ASSERT(dsl_pool_config_held(dp)); + + err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); + if (err != 0) + return (err); + + /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ + if (dd->dd_myname[0] == '$') { + dsl_dir_rele(dd, FTAG); + return (0); + } + + thisobj = dd->dd_phys->dd_head_dataset_obj; + attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + + /* + * Iterate over all children. + */ + if (flags & DS_FIND_CHILDREN) { + for (zap_cursor_init(&zc, dp->dp_meta_objset, + dd->dd_phys->dd_child_dir_zapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); + + err = dmu_objset_find_dp(dp, attr->za_first_integer, + func, arg, flags); + if (err != 0) + break; + } + zap_cursor_fini(&zc); + + if (err != 0) { + dsl_dir_rele(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + return (err); + } + } + + /* + * Iterate over all snapshots. + */ + if (flags & DS_FIND_SNAPSHOTS) { + dsl_dataset_t *ds; + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + + if (err == 0) { + uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + dsl_dataset_rele(ds, FTAG); + + for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); + zap_cursor_retrieve(&zc, attr) == 0; + (void) zap_cursor_advance(&zc)) { + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); + + err = dsl_dataset_hold_obj(dp, + attr->za_first_integer, FTAG, &ds); + if (err != 0) + break; + err = func(dp, ds, arg); + dsl_dataset_rele(ds, FTAG); + if (err != 0) + break; + } + zap_cursor_fini(&zc); + } + } + + dsl_dir_rele(dd, FTAG); + kmem_free(attr, sizeof (zap_attribute_t)); + + if (err != 0) + return (err); + + /* + * Apply to self. + */ + err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); + if (err != 0) + return (err); + err = func(dp, ds, arg); + dsl_dataset_rele(ds, FTAG); + return (err); } /* - * Find all objsets under name, call func on each + * Find all objsets under name, and for each, call 'func(child_name, arg)'. + * The dp_config_rwlock must not be held when this is called, and it + * will not be held when the callback is called. + * Therefore this function should only be used when the pool is not changing + * (e.g. in syncing context), or the callback can deal with the possible races. */ -int -dmu_objset_find_spa(spa_t *spa, const char *name, - int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags) +static int +dmu_objset_find_impl(spa_t *spa, const char *name, + int func(const char *, void *), void *arg, int flags) { dsl_dir_t *dd; - dsl_pool_t *dp; + dsl_pool_t *dp = spa_get_dsl(spa); dsl_dataset_t *ds; zap_cursor_t zc; zap_attribute_t *attr; @@ -1717,21 +1615,23 @@ dmu_objset_find_spa(spa_t *spa, const char *name, uint64_t thisobj; int err; - if (name == NULL) - name = spa_name(spa); - err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL); - if (err) + dsl_pool_config_enter(dp, FTAG); + + err = dsl_dir_hold(dp, name, FTAG, &dd, NULL); + if (err != 0) { + dsl_pool_config_exit(dp, FTAG); return (err); + } /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ if (dd->dd_myname[0] == '$') { - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_config_exit(dp, FTAG); return (0); } thisobj = dd->dd_phys->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); - dp = dd->dd_pool; /* * Iterate over all children. @@ -1741,19 +1641,24 @@ dmu_objset_find_spa(spa_t *spa, const char *name, dd->dd_phys->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { - ASSERT(attr->za_integer_length == sizeof (uint64_t)); - ASSERT(attr->za_num_integers == 1); + ASSERT3U(attr->za_integer_length, ==, + sizeof (uint64_t)); + ASSERT3U(attr->za_num_integers, ==, 1); child = kmem_asprintf("%s/%s", name, attr->za_name); - err = dmu_objset_find_spa(spa, child, func, arg, flags); + dsl_pool_config_exit(dp, FTAG); + err = dmu_objset_find_impl(spa, child, + func, arg, flags); + dsl_pool_config_enter(dp, FTAG); strfree(child); - if (err) + if (err != 0) break; } zap_cursor_fini(&zc); - if (err) { - dsl_dir_close(dd, FTAG); + if (err != 0) { + dsl_dir_rele(dd, FTAG); + dsl_pool_config_exit(dp, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); return (err); } @@ -1763,11 +1668,7 @@ dmu_objset_find_spa(spa_t *spa, const char *name, * Iterate over all snapshots. */ if (flags & DS_FIND_SNAPSHOTS) { - if (!dsl_pool_sync_context(dp)) - rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); - if (!dsl_pool_sync_context(dp)) - rw_exit(&dp->dp_config_rwlock); if (err == 0) { uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; @@ -1776,64 +1677,50 @@ dmu_objset_find_spa(spa_t *spa, const char *name, for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { - ASSERT(attr->za_integer_length == + ASSERT3U(attr->za_integer_length, ==, sizeof (uint64_t)); - ASSERT(attr->za_num_integers == 1); + ASSERT3U(attr->za_num_integers, ==, 1); child = kmem_asprintf("%s@%s", name, attr->za_name); - err = func(spa, attr->za_first_integer, - child, arg); + dsl_pool_config_exit(dp, FTAG); + err = func(child, arg); + dsl_pool_config_enter(dp, FTAG); strfree(child); - if (err) + if (err != 0) break; } zap_cursor_fini(&zc); } } - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); kmem_free(attr, sizeof (zap_attribute_t)); + dsl_pool_config_exit(dp, FTAG); - if (err) + if (err != 0) return (err); - /* - * Apply to self if appropriate. - */ - err = func(spa, thisobj, name, arg); - return (err); + /* Apply to self. */ + return (func(name, arg)); } -/* ARGSUSED */ +/* + * See comment above dmu_objset_find_impl(). + */ int -dmu_objset_prefetch(const char *name, void *arg) +dmu_objset_find(char *name, int func(const char *, void *), void *arg, + int flags) { - dsl_dataset_t *ds; - - if (dsl_dataset_hold(name, FTAG, &ds)) - return (0); - - if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { - mutex_enter(&ds->ds_opening_lock); - if (ds->ds_objset == NULL) { - uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; - zbookmark_t zb; - - SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, - ZB_ROOT_LEVEL, ZB_ROOT_BLKID); - - (void) arc_read(NULL, dsl_dataset_get_spa(ds), - &ds->ds_phys->ds_bp, NULL, NULL, - ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, - &aflags, &zb); - } - mutex_exit(&ds->ds_opening_lock); - } + spa_t *spa; + int error; - dsl_dataset_rele(ds, FTAG); - return (0); + error = spa_open(name, &spa, FTAG); + if (error != 0) + return (error); + error = dmu_objset_find_impl(spa, name, func, arg, flags); + spa_close(spa, FTAG); + return (error); } void @@ -1850,6 +1737,22 @@ dmu_objset_get_user(objset_t *os) return (os->os_user_ptr); } +/* + * Determine name of filesystem, given name of snapshot. + * buf must be at least MAXNAMELEN bytes + */ +int +dmu_fsname(const char *snapname, char *buf) +{ + char *atp = strchr(snapname, '@'); + if (atp == NULL) + return (EINVAL); + if (atp - snapname >= MAXNAMELEN) + return (ENAMETOOLONG); + (void) strlcpy(buf, snapname, atp - snapname + 1); + return (0); +} + #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dmu_objset_zil); EXPORT_SYMBOL(dmu_objset_pool); @@ -1863,16 +1766,12 @@ EXPORT_SYMBOL(dmu_objset_disown); EXPORT_SYMBOL(dmu_objset_from_ds); EXPORT_SYMBOL(dmu_objset_create); EXPORT_SYMBOL(dmu_objset_clone); -EXPORT_SYMBOL(dmu_objset_destroy); -EXPORT_SYMBOL(dmu_objset_snapshot); EXPORT_SYMBOL(dmu_objset_stats); EXPORT_SYMBOL(dmu_objset_fast_stat); EXPORT_SYMBOL(dmu_objset_spa); EXPORT_SYMBOL(dmu_objset_space); EXPORT_SYMBOL(dmu_objset_fsid_guid); EXPORT_SYMBOL(dmu_objset_find); -EXPORT_SYMBOL(dmu_objset_find_spa); -EXPORT_SYMBOL(dmu_objset_prefetch); EXPORT_SYMBOL(dmu_objset_byteswap); EXPORT_SYMBOL(dmu_objset_evict_dbufs); EXPORT_SYMBOL(dmu_objset_snap_cmtime); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 6552e1d9d72d..2945be89b8d8 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -48,11 +48,14 @@ #include #include #include +#include +#include /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ int zfs_send_corrupt_data = B_FALSE; static char *dmu_recv_tag = "dmu_recv_tag"; +static const char *recv_clone_name = "%recv"; typedef struct dump_bytes_io { dmu_sendarg_t *dbi_dsp; @@ -319,7 +322,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) return (EINTR); - if (dsp->dsa_err) + if (dsp->dsa_err != 0) return (EINTR); return (0); } @@ -369,7 +372,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = dump_dnode(dsp, dnobj, blk+i); - if (err) + if (err != 0) break; } (void) arc_buf_remove_ref(abuf, &abuf); @@ -417,65 +420,33 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } /* - * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. - * For example, they could both be snapshots of the same filesystem, and - * 'earlier' is before 'later'. Or 'earlier' could be the origin of - * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's - * filesystem. Or 'earlier' could be the origin's origin. + * Releases dp, ds, and fromds, using the specified tag. */ -static boolean_t -is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) -{ - dsl_pool_t *dp = later->ds_dir->dd_pool; - int error; - boolean_t ret; - dsl_dataset_t *origin; - - if (earlier->ds_phys->ds_creation_txg >= - later->ds_phys->ds_creation_txg) - return (B_FALSE); - - if (later->ds_dir == earlier->ds_dir) - return (B_TRUE); - if (!dsl_dir_is_clone(later->ds_dir)) - return (B_FALSE); - - rw_enter(&dp->dp_config_rwlock, RW_READER); - if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) { - rw_exit(&dp->dp_config_rwlock); - return (B_TRUE); - } - error = dsl_dataset_hold_obj(dp, - later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); - rw_exit(&dp->dp_config_rwlock); - if (error != 0) - return (B_FALSE); - ret = is_before(origin, earlier); - dsl_dataset_rele(origin, FTAG); - return (ret); -} - -int -dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, - offset_t *off) +static int +dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, + dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off) { - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; + objset_t *os; dmu_replay_record_t *drr; dmu_sendarg_t *dsp; int err; uint64_t fromtxg = 0; - /* tosnap must be a snapshot */ - if (ds->ds_phys->ds_next_snap_obj == 0) - return (EINVAL); - - /* - * fromsnap must be an earlier snapshot from the same fs as tosnap, - * or the origin's fs. - */ - if (fromds != NULL && !is_before(ds, fromds)) + if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) { + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); return (EXDEV); + } + + err = dmu_objset_from_ds(ds, &os); + if (err != 0) { + if (fromds != NULL) + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); + return (err); + } drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; @@ -484,13 +455,17 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, DMU_SUBSTREAM); #ifdef _KERNEL - if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { + if (dmu_objset_type(os) == DMU_OST_ZFS) { uint64_t version; - if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) { + if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { kmem_free(drr, sizeof (dmu_replay_record_t)); + if (fromds != NULL) + dsl_dataset_rele(fromds, tag); + dsl_dataset_rele(ds, tag); + dsl_pool_rele(dp, tag); return (EINVAL); } - if (version == ZPL_VERSION_SA) { + if (version >= ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, DMU_BACKUP_FEATURE_SA_SPILL); @@ -500,19 +475,22 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; - drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; + drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); if (fromds != NULL && ds->ds_dir != fromds->ds_dir) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; - if (fromds) + if (fromds != NULL) drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); - if (fromds) + if (fromds != NULL) { fromtxg = fromds->ds_phys->ds_creation_txg; + dsl_dataset_rele(fromds, tag); + fromds = NULL; + } dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); @@ -520,7 +498,7 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, dsp->dsa_vp = vp; dsp->dsa_outfd = outfd; dsp->dsa_proc = curproc; - dsp->dsa_os = tosnap; + dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = ds->ds_phys->ds_guid; ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); @@ -535,6 +513,9 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, goto out; } + dsl_dataset_long_hold(ds, FTAG); + dsl_pool_rele(dp, tag); + err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, backup_cb, dsp); @@ -542,8 +523,8 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) err = EINTR; - if (err) { - if (err == EINTR && dsp->dsa_err) + if (err != 0) { + if (err == EINTR && dsp->dsa_err != 0) err = dsp->dsa_err; goto out; } @@ -566,27 +547,96 @@ dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, kmem_free(drr, sizeof (dmu_replay_record_t)); kmem_free(dsp, sizeof (dmu_sendarg_t)); + dsl_dataset_long_rele(ds, FTAG); + dsl_dataset_rele(ds, tag); + return (err); } int -dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) +dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, + int outfd, vnode_t *vp, offset_t *off) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + dsl_dataset_t *fromds = NULL; + int err; + + err = dsl_pool_hold(pool, FTAG, &dp); + if (err != 0) + return (err); + + err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (fromsnap != 0) { + err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); + if (err != 0) { + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); + } + } + + return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off)); +} + +int +dmu_send(const char *tosnap, const char *fromsnap, + int outfd, vnode_t *vp, offset_t *off) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + dsl_dataset_t *fromds = NULL; + int err; + + if (strchr(tosnap, '@') == NULL) + return (EINVAL); + if (fromsnap != NULL && strchr(fromsnap, '@') == NULL) + return (EINVAL); + + err = dsl_pool_hold(tosnap, FTAG, &dp); + if (err != 0) + return (err); + + err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (fromsnap != NULL) { + err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); + if (err != 0) { + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); + } + } + return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off)); +} + +int +dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) { - dsl_dataset_t *ds = tosnap->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; - dsl_pool_t *dp = ds->ds_dir->dd_pool; int err; uint64_t size, recordsize; + ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); + + ASSERT(dsl_pool_config_held(dp)); /* tosnap must be a snapshot */ - if (ds->ds_phys->ds_next_snap_obj == 0) + if (!dsl_dataset_is_snapshot(ds)) return (EINVAL); /* * fromsnap must be an earlier snapshot from the same fs as tosnap, * or the origin's fs. */ - if (fromds != NULL && !is_before(ds, fromds)) + if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) return (EXDEV); /* Get uncompressed size estimate of changed data. */ @@ -596,7 +646,7 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, &used, &comp, &size); - if (err) + if (err != 0) return (err); } @@ -615,11 +665,8 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) * Therefore, space used by indirect blocks is sizeof(blkptr_t) per * block, which we observe in practice. */ - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_prop_get_ds(ds, "recordsize", - sizeof (recordsize), 1, &recordsize, NULL); - rw_exit(&dp->dp_config_rwlock); - if (err) + err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); + if (err != 0) return (err); size -= size / recordsize * sizeof (blkptr_t); @@ -631,93 +678,40 @@ dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep) return (0); } -struct recvbeginsyncarg { - const char *tofs; - const char *tosnap; - dsl_dataset_t *origin; - uint64_t fromguid; - dmu_objset_type_t type; - void *tag; - boolean_t force; - uint64_t dsflags; - char clonelastname[MAXNAMELEN]; - dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ - cred_t *cr; -}; - -/* ARGSUSED */ -static int -recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - struct recvbeginsyncarg *rbsa = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t val; - int err; - - err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, - strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); - - if (err != ENOENT) - return (err ? err : EEXIST); - - if (rbsa->origin) { - /* make sure it's a snap in the same pool */ - if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) - return (EXDEV); - if (!dsl_dataset_is_snapshot(rbsa->origin)) - return (EINVAL); - if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) - return (ENODEV); - } - - return (0); -} - -static void -recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - struct recvbeginsyncarg *rbsa = arg2; - uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; - uint64_t dsobj; - - /* Create and open new dataset. */ - dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, - rbsa->origin, flags, rbsa->cr, tx); - VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, - B_TRUE, dmu_recv_tag, &rbsa->ds)); - - if (rbsa->origin == NULL) { - (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, - rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); - } - - spa_history_log_internal_ds(rbsa->ds, "receive new", tx, ""); -} +typedef struct dmu_recv_begin_arg { + const char *drba_origin; + dmu_recv_cookie_t *drba_cookie; + cred_t *drba_cred; +} dmu_recv_begin_arg_t; -/* ARGSUSED */ static int -recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) +recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, + uint64_t fromguid) { - dsl_dataset_t *ds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - int err; uint64_t val; + int error; + dsl_pool_t *dp = ds->ds_dir->dd_pool; /* must not have any changes since most recent snapshot */ - if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) + if (!drba->drba_cookie->drc_force && + dsl_dataset_modified_since_lastsnap(ds)) return (ETXTBSY); + /* temporary clone name must not exist */ + error = zap_lookup(dp->dp_meta_objset, + ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, + 8, 1, &val); + if (error != ENOENT) + return (error == 0 ? EBUSY : error); + /* new snapshot name must not exist */ - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); + error = zap_lookup(dp->dp_meta_objset, + ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, + 8, 1, &val); + if (error != ENOENT) + return (error == 0 ? EEXIST : error); - if (rbsa->fromguid) { + if (fromguid != 0) { /* if incremental, most recent snapshot must match fromguid */ if (ds->ds_prev == NULL) return (ENODEV); @@ -726,20 +720,20 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) * most recent snapshot must match fromguid, or there are no * changes since the fromguid one */ - if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { + if (ds->ds_prev->ds_phys->ds_guid != fromguid) { uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; while (obj != 0) { dsl_dataset_t *snap; - err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - obj, FTAG, &snap); - if (err) + error = dsl_dataset_hold_obj(dp, obj, FTAG, + &snap); + if (error != 0) return (ENODEV); if (snap->ds_phys->ds_creation_txg < birth) { dsl_dataset_rele(snap, FTAG); return (ENODEV); } - if (snap->ds_phys->ds_guid == rbsa->fromguid) { + if (snap->ds_phys->ds_guid == fromguid) { dsl_dataset_rele(snap, FTAG); break; /* it's ok */ } @@ -755,58 +749,153 @@ recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) return (ENODEV); } - /* temporary clone name must not exist */ - err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_dir->dd_phys->dd_child_dir_zapobj, - rbsa->clonelastname, 8, 1, &val); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); - return (0); + +} + +static int +dmu_recv_begin_check(void *arg, dmu_tx_t *tx) +{ + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t fromguid = drrb->drr_fromguid; + int flags = drrb->drr_flags; + int error; + dsl_dataset_t *ds; + const char *tofs = drba->drba_cookie->drc_tofs; + + /* already checked */ + ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM || + drrb->drr_type >= DMU_OST_NUMTYPES || + ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) + return (EINVAL); + + /* Verify pool version supports SA if SA_SPILL feature set */ + if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_SA_SPILL) && + spa_version(dp->dp_spa) < SPA_VERSION_SA) { + return (ENOTSUP); + } + + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (error == 0) { + /* target fs already exists; recv into temp clone */ + + /* Can't recv a clone into an existing fs */ + if (flags & DRR_FLAG_CLONE) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + + error = recv_begin_check_existing_impl(drba, ds, fromguid); + dsl_dataset_rele(ds, FTAG); + } else if (error == ENOENT) { + /* target fs does not exist; must be a full backup or clone */ + char buf[MAXNAMELEN]; + + /* + * If it's a non-clone incremental, we are missing the + * target fs, so fail the recv. + */ + if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) + return (ENOENT); + + /* Open the parent of tofs */ + ASSERT3U(strlen(tofs), <, MAXNAMELEN); + (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); + error = dsl_dataset_hold(dp, buf, FTAG, &ds); + if (error != 0) + return (error); + + if (drba->drba_origin != NULL) { + dsl_dataset_t *origin; + error = dsl_dataset_hold(dp, drba->drba_origin, + FTAG, &origin); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + if (!dsl_dataset_is_snapshot(origin)) { + dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } + if (origin->ds_phys->ds_guid != fromguid) { + dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele(ds, FTAG); + return (ENODEV); + } + dsl_dataset_rele(origin, FTAG); + } + dsl_dataset_rele(ds, FTAG); + error = 0; + } + return (error); } -/* ARGSUSED */ static void -recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ohds = arg1; - struct recvbeginsyncarg *rbsa = arg2; - dsl_pool_t *dp = ohds->ds_dir->dd_pool; - dsl_dataset_t *cds; - uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + const char *tofs = drba->drba_cookie->drc_tofs; + dsl_dataset_t *ds, *newds; uint64_t dsobj; + int error; + uint64_t crflags; + + crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? + DS_FLAG_CI_DATASET : 0; - /* create and open the temporary clone */ - dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, - ohds->ds_prev, flags, rbsa->cr, tx); - VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (error == 0) { + /* create temporary clone */ + dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, + ds->ds_prev, crflags, drba->drba_cred, tx); + dsl_dataset_rele(ds, FTAG); + } else { + dsl_dir_t *dd; + const char *tail; + dsl_dataset_t *origin = NULL; + + VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); + + if (drba->drba_origin != NULL) { + VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, + FTAG, &origin)); + } + + /* Create new dataset. */ + dsobj = dsl_dataset_create_sync(dd, + strrchr(tofs, '/') + 1, + origin, crflags, drba->drba_cred, tx); + if (origin != NULL) + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(dd, FTAG); + drba->drba_cookie->drc_newfs = B_TRUE; + } + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + + dmu_buf_will_dirty(newds->ds_dbuf, tx); + newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; /* * If we actually created a non-clone, we need to create the * objset in our new dataset. */ - if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { + if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { (void) dmu_objset_create_impl(dp->dp_spa, - cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); + newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } - rbsa->ds = cds; - - spa_history_log_internal_ds(cds, "receive over existing", tx, ""); -} - -static boolean_t -dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) -{ - int featureflags; - - featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + drba->drba_cookie->drc_ds = newds; - /* Verify pool version supports SA if SA_SPILL feature set */ - return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && - (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); + spa_history_log_internal_ds(newds, "receive", tx, ""); } /* @@ -814,132 +903,55 @@ dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) * succeeds; otherwise we will leak the holds on the datasets. */ int -dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, - boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) +dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, + boolean_t force, char *origin, dmu_recv_cookie_t *drc) { - int err = 0; - boolean_t byteswap; - struct recvbeginsyncarg rbsa = { 0 }; - uint64_t versioninfo; - int flags; - dsl_dataset_t *ds; - - if (drrb->drr_magic == DMU_BACKUP_MAGIC) - byteswap = FALSE; - else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) - byteswap = TRUE; - else - return (EINVAL); - - rbsa.tofs = tofs; - rbsa.tosnap = tosnap; - rbsa.origin = origin ? origin->os_dsl_dataset : NULL; - rbsa.fromguid = drrb->drr_fromguid; - rbsa.type = drrb->drr_type; - rbsa.tag = FTAG; - rbsa.dsflags = 0; - rbsa.cr = CRED(); - versioninfo = drrb->drr_versioninfo; - flags = drrb->drr_flags; - - if (byteswap) { - rbsa.type = BSWAP_32(rbsa.type); - rbsa.fromguid = BSWAP_64(rbsa.fromguid); - versioninfo = BSWAP_64(versioninfo); - flags = BSWAP_32(flags); - } - - if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || - rbsa.type >= DMU_OST_NUMTYPES || - ((flags & DRR_FLAG_CLONE) && origin == NULL)) - return (EINVAL); - - if (flags & DRR_FLAG_CI_DATA) - rbsa.dsflags = DS_FLAG_CI_DATASET; + dmu_recv_begin_arg_t drba = { 0 }; + dmu_replay_record_t *drr; bzero(drc, sizeof (dmu_recv_cookie_t)); drc->drc_drrb = drrb; drc->drc_tosnap = tosnap; - drc->drc_top_ds = top_ds; + drc->drc_tofs = tofs; drc->drc_force = force; - /* - * Process the begin in syncing context. - */ - - /* open the dataset we are logically receiving into */ - err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); - if (err == 0) { - if (dmu_recv_verify_features(ds, drrb)) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (ENOTSUP); - } - /* target fs already exists; recv into temp clone */ - - /* Can't recv a clone into an existing fs */ - if (flags & DRR_FLAG_CLONE) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (EINVAL); - } - - /* must not have an incremental recv already in progress */ - if (!mutex_tryenter(&ds->ds_recvlock)) { - dsl_dataset_rele(ds, dmu_recv_tag); - return (EBUSY); - } - - /* tmp clone name is: tofs/%tosnap" */ - (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), - "%%%s", tosnap); - rbsa.force = force; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_existing_check, recv_existing_sync, ds, &rbsa, 5); - if (err) { - mutex_exit(&ds->ds_recvlock); - dsl_dataset_rele(ds, dmu_recv_tag); - return (err); - } - drc->drc_logical_ds = ds; - drc->drc_real_ds = rbsa.ds; - } else if (err == ENOENT) { - /* target fs does not exist; must be a full backup or clone */ - char *cp; - - /* - * If it's a non-clone incremental, we are missing the - * target fs, so fail the recv. - */ - if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) - return (ENOENT); - - /* Open the parent of tofs */ - cp = strrchr(tofs, '/'); - *cp = '\0'; - err = dsl_dataset_hold(tofs, FTAG, &ds); - *cp = '/'; - if (err) - return (err); + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + drc->drc_byteswap = B_TRUE; + else if (drrb->drr_magic != DMU_BACKUP_MAGIC) + return (EINVAL); - if (dmu_recv_verify_features(ds, drrb)) { - dsl_dataset_rele(ds, FTAG); - return (ENOTSUP); - } + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); + drr->drr_type = DRR_BEGIN; + drr->drr_u.drr_begin = *drc->drc_drrb; + if (drc->drc_byteswap) { + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &drc->drc_cksum); + } else { + fletcher_4_incremental_native(drr, + sizeof (dmu_replay_record_t), &drc->drc_cksum); + } + kmem_free(drr, sizeof (dmu_replay_record_t)); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); - dsl_dataset_rele(ds, FTAG); - if (err) - return (err); - drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; - drc->drc_newfs = B_TRUE; + if (drc->drc_byteswap) { + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); + drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); } - return (err); + drba.drba_origin = origin; + drba.drba_cookie = drc; + drba.drba_cred = CRED(); + + return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, + &drba, 5)); } struct restorearg { int err; - int byteswap; + boolean_t byteswap; vnode_t *vp; char *buf; uint64_t voff; @@ -975,7 +987,7 @@ free_guid_map_onexit(void *arg) guid_map_entry_t *gmep; while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { - dsl_dataset_rele(gmep->gme_ds, ca); + dsl_dataset_long_rele(gmep->gme_ds, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -1003,7 +1015,7 @@ restore_read(struct restorearg *ra, int len) ra->err = EINVAL; ra->voff += len - done - resid; done = len - resid; - if (ra->err) + if (ra->err != 0) return (NULL); } @@ -1124,7 +1136,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) if (drro->drr_bonuslen) { data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); - if (ra->err) + if (ra->err != 0) return (ra->err); } @@ -1133,7 +1145,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1147,14 +1159,14 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen); } - if (err) { + if (err != 0) { return (EINVAL); } tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, drro->drr_object); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1202,7 +1214,7 @@ restore_freeobjects(struct restorearg *ra, objset_t *os, continue; err = dmu_free_object(os, obj); - if (err) + if (err != 0) return (err); } return (0); @@ -1232,7 +1244,7 @@ restore_write(struct restorearg *ra, objset_t *os, dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1295,7 +1307,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os, dmu_tx_hold_write(tx, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_tx_abort(tx); return (err); } @@ -1336,7 +1348,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) dmu_tx_hold_spill(tx, db->db_object); err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + if (err != 0) { dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); dmu_tx_abort(tx); @@ -1375,6 +1387,16 @@ restore_free(struct restorearg *ra, objset_t *os, return (err); } +/* used to destroy the drc_ds on error */ +static void +dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) +{ + char name[MAXNAMELEN]; + dsl_dataset_name(drc->drc_ds, name); + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + (void) dsl_destroy_head(name); +} + /* * NB: callers *must* call dmu_recv_end() if this succeeds. */ @@ -1388,52 +1410,24 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, zio_cksum_t pcksum; int featureflags; - if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) - ra.byteswap = TRUE; - - { - /* compute checksum of drr_begin record */ - dmu_replay_record_t *drr; - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); - - drr->drr_type = DRR_BEGIN; - drr->drr_u.drr_begin = *drc->drc_drrb; - if (ra.byteswap) { - fletcher_4_incremental_byteswap(drr, - sizeof (dmu_replay_record_t), &ra.cksum); - } else { - fletcher_4_incremental_native(drr, - sizeof (dmu_replay_record_t), &ra.cksum); - } - kmem_free(drr, sizeof (dmu_replay_record_t)); - } - - if (ra.byteswap) { - struct drr_begin *drrb = drc->drc_drrb; - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); - } - + ra.byteswap = drc->drc_byteswap; + ra.cksum = drc->drc_cksum; ra.vp = vp; ra.voff = *voffp; ra.bufsize = 1<<20; ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ - ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == + ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, DMU_SUBSTREAM); - ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); + ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); /* * Open the objset we are modifying. */ - VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); + VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); - ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); + ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); @@ -1446,7 +1440,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, goto out; } ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (ra.err) { + if (ra.err != 0) { cleanup_fd = -1; goto out; } @@ -1460,12 +1454,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ra.err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); - if (ra.err) + if (ra.err != 0) goto out; } else { ra.err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&ra.guid_to_ds_map); - if (ra.err) + if (ra.err != 0) goto out; } @@ -1559,14 +1553,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, * destroy what we created, so we don't leave it in the * inconsistent restoring state. */ - txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); - - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, - B_FALSE); - if (drc->drc_real_ds != drc->drc_logical_ds) { - mutex_exit(&drc->drc_logical_ds->ds_recvlock); - dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); - } + dmu_recv_cleanup_ds(drc); } vmem_free(ra.buf, ra.bufsize); @@ -1574,142 +1561,179 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, return (ra.err); } -struct recvendsyncarg { - char *tosnap; - uint64_t creation_time; - uint64_t toguid; -}; - static int -recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_end_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - struct recvendsyncarg *resa = arg2; + dmu_recv_cookie_t *drc = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + int error; + + ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); + + if (!drc->drc_newfs) { + dsl_dataset_t *origin_head; + + error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); + if (error != 0) + return (error); + error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, + origin_head, drc->drc_force); + if (error != 0) { + dsl_dataset_rele(origin_head, FTAG); + return (error); + } + error = dsl_dataset_snapshot_check_impl(origin_head, + drc->drc_tosnap, tx); + dsl_dataset_rele(origin_head, FTAG); + if (error != 0) + return (error); - return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); + error = dsl_destroy_head_check_impl(drc->drc_ds, 1); + } else { + error = dsl_dataset_snapshot_check_impl(drc->drc_ds, + drc->drc_tosnap, tx); + } + return (error); } static void -recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dmu_recv_end_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - struct recvendsyncarg *resa = arg2; + dmu_recv_cookie_t *drc = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + + spa_history_log_internal_ds(drc->drc_ds, "finish receiving", + tx, "snap=%s", drc->drc_tosnap); + + if (!drc->drc_newfs) { + dsl_dataset_t *origin_head; + + VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, + &origin_head)); + dsl_dataset_clone_swap_sync_impl(drc->drc_ds, + origin_head, tx); + dsl_dataset_snapshot_sync_impl(origin_head, + drc->drc_tosnap, tx); + + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); + origin_head->ds_prev->ds_phys->ds_creation_time = + drc->drc_drrb->drr_creation_time; + origin_head->ds_prev->ds_phys->ds_guid = + drc->drc_drrb->drr_toguid; + origin_head->ds_prev->ds_phys->ds_flags &= + ~DS_FLAG_INCONSISTENT; + + dmu_buf_will_dirty(origin_head->ds_dbuf, tx); + origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + + dsl_dataset_rele(origin_head, FTAG); + dsl_destroy_head_sync_impl(drc->drc_ds, tx); + } else { + dsl_dataset_t *ds = drc->drc_ds; - dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); + dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); - /* set snapshot's creation time and guid */ - dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; - ds->ds_prev->ds_phys->ds_guid = resa->toguid; - ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + /* set snapshot's creation time and guid */ + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + ds->ds_prev->ds_phys->ds_creation_time = + drc->drc_drrb->drr_creation_time; + ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; + ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; - spa_history_log_internal_ds(ds, "finished receiving", tx, ""); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + } + drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; + /* + * Release the hold from dmu_recv_begin. This must be done before + * we return to open context, so that when we free the dataset's dnode, + * we can evict its bonus buffer. + */ + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + drc->drc_ds = NULL; } static int -add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) +add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; - uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; + dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; int err; ASSERT(guid_map != NULL); - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); + err = dsl_pool_hold(name, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snapds); if (err == 0) { gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); gmep->guid = snapds->ds_phys->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); + dsl_dataset_long_hold(snapds, gmep); + dsl_dataset_rele(snapds, FTAG); } - rw_exit(&dp->dp_config_rwlock); + dsl_pool_rele(dp, FTAG); return (err); } +static int dmu_recv_end_modified_blocks = 3; + static int dmu_recv_existing_end(dmu_recv_cookie_t *drc) { - struct recvendsyncarg resa; - dsl_dataset_t *ds = drc->drc_logical_ds; - int err, myerr; - - if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { - err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, - drc->drc_force); - if (err) - goto out; - } else { - mutex_exit(&ds->ds_recvlock); - dsl_dataset_rele(ds, dmu_recv_tag); - (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, - B_FALSE); - return (EBUSY); - } + int error; - resa.creation_time = drc->drc_drrb->drr_creation_time; - resa.toguid = drc->drc_drrb->drr_toguid; - resa.tosnap = drc->drc_tosnap; +#ifdef _KERNEL + char *name; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_end_check, recv_end_sync, ds, &resa, 3); - if (err) { - /* swap back */ - (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); - } + /* + * We will be destroying the ds; make sure its origin is unmounted if + * necessary. + */ + name = kmem_alloc(MAXNAMELEN, KM_SLEEP); + dsl_dataset_name(drc->drc_ds, name); + zfs_destroy_unmount_origin(name); + kmem_free(name, MAXNAMELEN); +#endif -out: - mutex_exit(&ds->ds_recvlock); - if (err == 0 && drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); - dsl_dataset_disown(ds, dmu_recv_tag); - myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); - ASSERT0(myerr); - return (err); + error = dsl_sync_task(drc->drc_tofs, + dmu_recv_end_check, dmu_recv_end_sync, drc, + dmu_recv_end_modified_blocks); + + if (error != 0) + dmu_recv_cleanup_ds(drc); + return (error); } static int dmu_recv_new_end(dmu_recv_cookie_t *drc) { - struct recvendsyncarg resa; - dsl_dataset_t *ds = drc->drc_logical_ds; - int err; - - /* - * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() - * expects it to have a ds_user_ptr (and zil), but clone_swap() - * can close it. - */ - txg_wait_synced(ds->ds_dir->dd_pool, 0); + int error; - resa.creation_time = drc->drc_drrb->drr_creation_time; - resa.toguid = drc->drc_drrb->drr_toguid; - resa.tosnap = drc->drc_tosnap; + error = dsl_sync_task(drc->drc_tofs, + dmu_recv_end_check, dmu_recv_end_sync, drc, + dmu_recv_end_modified_blocks); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - recv_end_check, recv_end_sync, ds, &resa, 3); - if (err) { - /* clean up the fs we just recv'd into */ - (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); - } else { - if (drc->drc_guid_to_ds_map != NULL) - (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); - /* release the hold from dmu_recv_begin */ - dsl_dataset_disown(ds, dmu_recv_tag); + if (error != 0) { + dmu_recv_cleanup_ds(drc); + } else if (drc->drc_guid_to_ds_map != NULL) { + (void) add_ds_to_guidmap(drc->drc_tofs, + drc->drc_guid_to_ds_map, + drc->drc_newsnapobj); } - return (err); + return (error); } int dmu_recv_end(dmu_recv_cookie_t *drc) { - if (drc->drc_logical_ds != drc->drc_real_ds) - return (dmu_recv_existing_end(drc)); - else + if (drc->drc_newfs) return (dmu_recv_new_end(drc)); + else + return (dmu_recv_existing_end(drc)); } diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 1c3972371437..32b3e50fc005 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); cbp = buf->b_data; @@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, zb->zb_level - 1, zb->zb_blkid * epb + i); err = traverse_visitbp(td, dnp, &cbp[i], &czb); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); dnp = buf->b_data; @@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, for (i = 0; i < epb; i++) { err = traverse_dnode(td, &dnp[i], zb->zb_objset, zb->zb_blkid * epb + i); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); - if (err) + if (err != 0) return (err); osp = buf->b_data; @@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, for (j = 0; j < dnp->dn_nblkptr; j++) { SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb); - if (err) { + if (err != 0) { if (!hard) break; lasterr = err; @@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -518,14 +518,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL); /* See comment on ZIL traversal in dsl_scan_visitds. */ - if (ds != NULL && !dsl_dataset_is_snapshot(ds)) { - objset_t *os; + if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) { + uint32_t flags = ARC_WAIT; + objset_phys_t *osp; + arc_buf_t *buf; - err = dmu_objset_from_ds(ds, &os); - if (err) + err = arc_read(NULL, td->td_spa, rootbp, + arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL); + if (err != 0) return (err); - traverse_zil(td, &os->os_zil_header); + osp = buf->b_data; + traverse_zil(td, &osp->os_zil_header); + (void) arc_buf_remove_ref(buf, &buf); } if (!(flags & TRAVERSE_PREFETCH_DATA) || @@ -591,7 +597,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, /* visit the MOS */ err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa), txg_start, NULL, flags, func, arg); - if (err) + if (err != 0) return (err); /* visit each dataset */ @@ -600,7 +606,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, dmu_object_info_t doi; err = dmu_object_info(mos, obj, &doi); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -611,10 +617,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, dsl_dataset_t *ds; uint64_t txg = txg_start; - rw_enter(&dp->dp_config_rwlock, RW_READER); + dsl_pool_config_enter(dp, FTAG); err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (err) { + dsl_pool_config_exit(dp, FTAG); + if (err != 0) { if (!hard) return (err); lasterr = err; @@ -624,7 +630,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, txg = ds->ds_phys->ds_prev_snap_txg; err = traverse_dataset(ds, txg, flags, func, arg); dsl_dataset_rele(ds, FTAG); - if (err) { + if (err != 0) { if (!hard) return (err); lasterr = err; diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 30867f9d76cb..3e46a02f8e57 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -917,7 +917,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) #endif static int -dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) +dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how) { dmu_tx_hold_t *txh; spa_t *spa = tx->tx_pool->dp_spa; @@ -985,15 +985,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) fudge += txh->txh_fudge; } - /* - * NB: This check must be after we've held the dnodes, so that - * the dmu_tx_unassign() logic will work properly - */ - if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) { - DMU_TX_STAT_BUMP(dmu_tx_how); - return (ERESTART); - } - /* * If a snapshot has been taken since we made our estimates, * assume that we won't be able to free or overwrite anything. @@ -1076,29 +1067,28 @@ dmu_tx_unassign(dmu_tx_t *tx) * * (1) TXG_WAIT. If the current open txg is full, waits until there's * a new one. This should be used when you're not holding locks. - * If will only fail if we're truly out of space (or over quota). + * It will only fail if we're truly out of space (or over quota). * * (2) TXG_NOWAIT. If we can't assign into the current open txg without * blocking, returns immediately with ERESTART. This should be used * whenever you're holding locks. On an ERESTART error, the caller * should drop locks, do a dmu_tx_wait(tx), and try again. - * - * (3) A specific txg. Use this if you need to ensure that multiple - * transactions all sync in the same txg. Like TXG_NOWAIT, it - * returns ERESTART if it can't assign you into the requested txg. */ int -dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) +dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how) { hrtime_t before, after; int err; ASSERT(tx->tx_txg == 0); - ASSERT(txg_how != 0); + ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT); ASSERT(!dsl_pool_sync_context(tx->tx_pool)); before = gethrtime(); + /* If we might wait, we must not hold the config lock. */ + ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool)); + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { dmu_tx_unassign(tx); @@ -1124,6 +1114,7 @@ dmu_tx_wait(dmu_tx_t *tx) spa_t *spa = tx->tx_pool->dp_spa; ASSERT(tx->tx_txg == 0); + ASSERT(!dsl_pool_config_held(tx->tx_pool)); /* * It's possible that the pool has become active after this thread @@ -1250,6 +1241,14 @@ dmu_tx_get_txg(dmu_tx_t *tx) return (tx->tx_txg); } +dsl_pool_t * +dmu_tx_pool(dmu_tx_t *tx) +{ + ASSERT(tx->tx_pool != NULL); + return (tx->tx_pool); +} + + void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index d8d66513d528..d88134d72b01 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -74,7 +74,11 @@ dnode_cons(void *arg, void *unused, int kmflag) mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL); - refcount_create(&dn->dn_holds); + /* + * Every dbuf has a reference, and dropping a tracked reference is + * O(number of references), so don't track dn_holds. + */ + refcount_create_untracked(&dn->dn_holds); refcount_create(&dn->dn_tx_holds); list_link_init(&dn->dn_link); diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 76e603753cae..a1c71d4870a6 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -481,6 +481,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); + ASSERT3P(dn->dn_bonus, ==, NULL); /* * XXX - It would be nice to assert this, but we may still diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 2eca2b2044e2..5c0ca4d96225 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -45,12 +45,8 @@ #include #include #include - -static char *dsl_reaper = "the grim reaper"; - -static dsl_checkfunc_t dsl_dataset_destroy_begin_check; -static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; -static dsl_syncfunc_t dsl_dataset_set_reservation_sync; +#include +#include #define SWITCH64(x, y) \ { \ @@ -63,9 +59,6 @@ static dsl_syncfunc_t dsl_dataset_set_reservation_sync; #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE -#define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) - - /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -256,7 +249,7 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) { dsl_dataset_t *ds = dsv; - ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); + ASSERT(ds->ds_owner == NULL); unique_remove(ds->ds_fsid_guid); @@ -264,32 +257,26 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) dmu_objset_evict(ds->ds_objset); if (ds->ds_prev) { - dsl_dataset_drop_ref(ds->ds_prev, ds); + dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; } bplist_destroy(&ds->ds_pending_deadlist); - if (db != NULL) { + if (ds->ds_phys->ds_deadlist_obj != 0) dsl_deadlist_close(&ds->ds_deadlist); - } else { - ASSERT(ds->ds_deadlist.dl_dbuf == NULL); - ASSERT(!ds->ds_deadlist.dl_oldfmt); - } if (ds->ds_dir) - dsl_dir_close(ds->ds_dir, ds); + dsl_dir_rele(ds->ds_dir, ds); ASSERT(!list_link_active(&ds->ds_synced_link)); mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); kmem_free(ds, sizeof (dsl_dataset_t)); } -static int +int dsl_dataset_get_snapname(dsl_dataset_t *ds) { dsl_dataset_phys_t *headphys; @@ -305,7 +292,7 @@ dsl_dataset_get_snapname(dsl_dataset_t *ds) err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &headdbuf); - if (err) + if (err != 0) return (err); headphys = headdbuf->db_data; err = zap_value_search(dp->dp_meta_objset, @@ -334,8 +321,8 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) return (err); } -static int -dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) +int +dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; @@ -355,8 +342,8 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) return (err); } -static int -dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, +int +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; @@ -365,11 +352,10 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, int err; dmu_object_info_t doi; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); - if (err) + if (err != 0) return (err); /* Make sure dsobj has the correct object type. */ @@ -388,12 +374,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, list_link_init(&ds->ds_synced_link); mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); - - rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL); - cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); + refcount_create(&ds->ds_longholds); bplist_create(&ds->ds_pending_deadlist); dsl_deadlist_open(&ds->ds_deadlist, @@ -403,15 +386,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, offsetof(dmu_sendarg_t, dsa_link)); if (err == 0) { - err = dsl_dir_open_obj(dp, + err = dsl_dir_hold_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); } - if (err) { + if (err != 0) { mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); kmem_free(ds, sizeof (dsl_dataset_t)); @@ -421,8 +402,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, if (!dsl_dataset_is_snapshot(ds)) { ds->ds_snapname[0] = '\0'; - if (ds->ds_phys->ds_prev_snap_obj) { - err = dsl_dataset_get_ref(dp, + if (ds->ds_phys->ds_prev_snap_obj != 0) { + err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev); } @@ -438,29 +419,14 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, } if (err == 0 && !dsl_dataset_is_snapshot(ds)) { - /* - * In sync context, we're called with either no lock - * or with the write lock. If we're not syncing, - * we're always called with the read lock held. - */ - boolean_t need_lock = - !RW_WRITE_HELD(&dp->dp_config_rwlock) && - dsl_pool_sync_context(dp); - - if (need_lock) - rw_enter(&dp->dp_config_rwlock, RW_READER); - - err = dsl_prop_get_ds(ds, - "refreservation", sizeof (uint64_t), 1, - &ds->ds_reserved, NULL); + err = dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + &ds->ds_reserved); if (err == 0) { - err = dsl_prop_get_ds(ds, - "refquota", sizeof (uint64_t), 1, - &ds->ds_quota, NULL); + err = dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + &ds->ds_quota); } - - if (need_lock) - rw_exit(&dp->dp_config_rwlock); } else { ds->ds_reserved = ds->ds_quota = 0; } @@ -473,15 +439,13 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) - dsl_dataset_drop_ref(ds->ds_prev, ds); - dsl_dir_close(ds->ds_dir, ds); + dsl_dataset_rele(ds->ds_prev, ds); + dsl_dir_rele(ds->ds_dir, ds); mutex_destroy(&ds->ds_lock); - mutex_destroy(&ds->ds_recvlock); mutex_destroy(&ds->ds_opening_lock); - rw_destroy(&ds->ds_rwlock); - cv_destroy(&ds->ds_exclusive_cv); + refcount_destroy(&ds->ds_longholds); kmem_free(ds, sizeof (dsl_dataset_t)); - if (err) { + if (err != 0) { dmu_buf_rele(dbuf, tag); return (err); } @@ -496,170 +460,118 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); - mutex_enter(&ds->ds_lock); - if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { - mutex_exit(&ds->ds_lock); - dmu_buf_rele(ds->ds_dbuf, tag); - return (ENOENT); - } - mutex_exit(&ds->ds_lock); *dsp = ds; return (0); } -static int -dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) -{ - dsl_pool_t *dp = ds->ds_dir->dd_pool; - - /* - * In syncing context we don't want the rwlock lock: there - * may be an existing writer waiting for sync phase to - * finish. We don't need to worry about such writers, since - * sync phase is single-threaded, so the writer can't be - * doing anything while we are active. - */ - if (dsl_pool_sync_context(dp)) { - ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); - return (0); - } - - /* - * Normal users will hold the ds_rwlock as a READER until they - * are finished (i.e., call dsl_dataset_rele()). "Owners" will - * drop their READER lock after they set the ds_owner field. - * - * If the dataset is being destroyed, the destroy thread will - * obtain a WRITER lock for exclusive access after it's done its - * open-context work and then change the ds_owner to - * dsl_reaper once destruction is assured. So threads - * may block here temporarily, until the "destructability" of - * the dataset is determined. - */ - ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); - mutex_enter(&ds->ds_lock); - while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { - rw_exit(&dp->dp_config_rwlock); - cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); - if (DSL_DATASET_IS_DESTROYED(ds)) { - mutex_exit(&ds->ds_lock); - dsl_dataset_drop_ref(ds, tag); - rw_enter(&dp->dp_config_rwlock, RW_READER); - return (ENOENT); - } - /* - * The dp_config_rwlock lives above the ds_lock. And - * we need to check DSL_DATASET_IS_DESTROYED() while - * holding the ds_lock, so we have to drop and reacquire - * the ds_lock here. - */ - mutex_exit(&ds->ds_lock); - rw_enter(&dp->dp_config_rwlock, RW_READER); - mutex_enter(&ds->ds_lock); - } - mutex_exit(&ds->ds_lock); - return (0); -} - -int -dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **dsp) -{ - int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); - - if (err) - return (err); - return (dsl_dataset_hold_ref(*dsp, tag)); -} - int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, +dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dataset_t **dsp) -{ - int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); - if (err) - return (err); - if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { - dsl_dataset_rele(*dsp, tag); - *dsp = NULL; - return (EBUSY); - } - return (0); -} - -int -dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; - dsl_pool_t *dp; const char *snapname; uint64_t obj; int err = 0; - err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); - if (err) + err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); + if (err != 0) return (err); - dp = dd->dd_pool; + ASSERT(dsl_pool_config_held(dp)); obj = dd->dd_phys->dd_head_dataset_obj; - rw_enter(&dp->dp_config_rwlock, RW_READER); - if (obj) - err = dsl_dataset_get_ref(dp, obj, tag, dsp); + if (obj != 0) + err = dsl_dataset_hold_obj(dp, obj, tag, dsp); else err = ENOENT; - if (err) - goto out; - - err = dsl_dataset_hold_ref(*dsp, tag); /* we may be looking for a snapshot */ if (err == 0 && snapname != NULL) { - dsl_dataset_t *ds = NULL; + dsl_dataset_t *ds; if (*snapname++ != '@') { dsl_dataset_rele(*dsp, tag); - err = ENOENT; - goto out; + dsl_dir_rele(dd, FTAG); + return (ENOENT); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); if (err == 0) - err = dsl_dataset_get_ref(dp, obj, tag, &ds); + err = dsl_dataset_hold_obj(dp, obj, tag, &ds); dsl_dataset_rele(*dsp, tag); - ASSERT3U((err == 0), ==, (ds != NULL)); - - if (ds) { + if (err == 0) { mutex_enter(&ds->ds_lock); if (ds->ds_snapname[0] == 0) (void) strlcpy(ds->ds_snapname, snapname, sizeof (ds->ds_snapname)); mutex_exit(&ds->ds_lock); - err = dsl_dataset_hold_ref(ds, tag); - *dsp = err ? NULL : ds; + *dsp = ds; } } -out: - rw_exit(&dp->dp_config_rwlock); - dsl_dir_close(dd, FTAG); + + dsl_dir_rele(dd, FTAG); return (err); } int -dsl_dataset_own(const char *name, boolean_t inconsistentok, +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(name, tag, dsp); - if (err) + int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + if (err != 0) + return (err); + if (!dsl_dataset_tryown(*dsp, tag)) { + dsl_dataset_rele(*dsp, tag); + *dsp = NULL; + return (EBUSY); + } + return (0); +} + +int +dsl_dataset_own(dsl_pool_t *dp, const char *name, + void *tag, dsl_dataset_t **dsp) +{ + int err = dsl_dataset_hold(dp, name, tag, dsp); + if (err != 0) return (err); - if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + if (!dsl_dataset_tryown(*dsp, tag)) { dsl_dataset_rele(*dsp, tag); return (EBUSY); } return (0); } +/* + * See the comment above dsl_pool_hold() for details. In summary, a long + * hold is used to prevent destruction of a dataset while the pool hold + * is dropped, allowing other concurrent operations (e.g. spa_sync()). + * + * The dataset and pool must be held when this function is called. After it + * is called, the pool hold may be released while the dataset is still held + * and accessed. + */ +void +dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) +{ + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); + (void) refcount_add(&ds->ds_longholds, tag); +} + +void +dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) +{ + (void) refcount_remove(&ds->ds_longholds, tag); +} + +/* Return B_TRUE if there are any long holds on this dataset. */ +boolean_t +dsl_dataset_long_held(dsl_dataset_t *ds) +{ + return (!refcount_is_zero(&ds->ds_longholds)); +} + void dsl_dataset_name(dsl_dataset_t *ds, char *name) { @@ -667,7 +579,7 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name) (void) strcpy(name, "mos"); } else { dsl_dir_name(ds->ds_dir, name); - VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { (void) strcat(name, "@"); /* @@ -685,90 +597,42 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name) } } -static int -dsl_dataset_namelen(dsl_dataset_t *ds) -{ - int result; - - if (ds == NULL) { - result = 3; /* "mos" */ - } else { - result = dsl_dir_namelen(ds->ds_dir); - VERIFY(0 == dsl_dataset_get_snapname(ds)); - if (ds->ds_snapname[0]) { - ++result; /* adding one for the @-sign */ - if (!MUTEX_HELD(&ds->ds_lock)) { - mutex_enter(&ds->ds_lock); - result += strlen(ds->ds_snapname); - mutex_exit(&ds->ds_lock); - } else { - result += strlen(ds->ds_snapname); - } - } - } - - return (result); -} - -void -dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) -{ - dmu_buf_rele(ds->ds_dbuf, tag); -} - void dsl_dataset_rele(dsl_dataset_t *ds, void *tag) { - if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { - rw_exit(&ds->ds_rwlock); - } - dsl_dataset_drop_ref(ds, tag); + dmu_buf_rele(ds->ds_dbuf, tag); } void dsl_dataset_disown(dsl_dataset_t *ds, void *tag) { - ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || - (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); + ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); mutex_enter(&ds->ds_lock); ds->ds_owner = NULL; - if (RW_WRITE_HELD(&ds->ds_rwlock)) { - rw_exit(&ds->ds_rwlock); - cv_broadcast(&ds->ds_exclusive_cv); - } mutex_exit(&ds->ds_lock); - if (ds->ds_dbuf) - dsl_dataset_drop_ref(ds, tag); + dsl_dataset_long_rele(ds, tag); + if (ds->ds_dbuf != NULL) + dsl_dataset_rele(ds, tag); else dsl_dataset_evict(NULL, ds); } boolean_t -dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) +dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) { boolean_t gotit = FALSE; mutex_enter(&ds->ds_lock); - if (ds->ds_owner == NULL && - (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { + if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { ds->ds_owner = tag; - if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) - rw_exit(&ds->ds_rwlock); + dsl_dataset_long_hold(ds, tag); gotit = TRUE; } mutex_exit(&ds->ds_lock); return (gotit); } -void -dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) -{ - ASSERT3P(owner, ==, ds->ds_owner); - if (!RW_WRITE_HELD(&ds->ds_rwlock)) - rw_enter(&ds->ds_rwlock, RW_WRITER); -} - uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, uint64_t flags, dmu_tx_t *tx) @@ -789,7 +653,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); - VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); @@ -807,7 +671,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, if (origin == NULL) { dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); } else { - dsl_dataset_t *ohds; + dsl_dataset_t *ohds; /* head of the origin snapshot */ dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = @@ -824,7 +688,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); @@ -836,9 +700,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, zap_create(mos, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY(0 == zap_add_int(mos, - origin->ds_phys->ds_next_clones_obj, - dsobj, tx)); + VERIFY0(zap_add_int(mos, + origin->ds_phys->ds_next_clones_obj, dsobj, tx)); } dmu_buf_will_dirty(dd->dd_dbuf, tx); @@ -850,7 +713,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(mos, + VERIFY0(zap_add_int(mos, origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); } } @@ -866,6 +729,16 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, return (dsobj); } +static void +dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + objset_t *os; + + VERIFY0(dmu_objset_from_ds(ds, &os)); + bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + dsl_dataset_dirty(ds, tx); +} + uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) @@ -874,29 +747,28 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, uint64_t dsobj, ddobj; dsl_dir_t *dd; + ASSERT(dmu_tx_is_syncing(tx)); ASSERT(lastname[0] != '@'); ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); - VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); + VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); - dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); + dsobj = dsl_dataset_create_sync_dd(dd, origin, + flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); /* * If we are creating a clone, make sure we zero out any stale * data from the origin snapshots zil header. */ - if (origin != NULL) { + if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { dsl_dataset_t *ds; - objset_t *os; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); - bzero(&os->os_zil_header, sizeof (os->os_zil_header)); - dsl_dataset_dirty(ds, tx); + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + dsl_dataset_zero_zil(ds, tx); dsl_dataset_rele(ds, FTAG); } @@ -904,304 +776,97 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, } /* - * The snapshots must all be in the same pool. + * The unique space in the head dataset can be calculated by subtracting + * the space used in the most recent snapshot, that is still being used + * in this file system, from the space currently in use. To figure out + * the space in the most recent snapshot still in use, we need to take + * the total space used in the snapshot and subtract out the space that + * has been freed up since the snapshot was taken. */ -int -dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, - nvlist_t *errlist) +void +dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) { - int err; - dsl_sync_task_t *dst; - spa_t *spa; - nvpair_t *pair; - dsl_sync_task_group_t *dstg; - - pair = nvlist_next_nvpair(snaps, NULL); - if (pair == NULL) - return (0); - - err = spa_open(nvpair_name(pair), &spa, FTAG); - if (err) - return (err); - dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - dsl_dataset_t *ds; - - err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds); - if (err == 0) { - struct dsl_ds_destroyarg *dsda; - - dsl_dataset_make_exclusive(ds, dstg); - dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), - KM_SLEEP); - dsda->ds = ds; - dsda->defer = defer; - dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, dsda, dstg, 0); - } else if (err == ENOENT) { - err = 0; - } else { - fnvlist_add_int32(errlist, nvpair_name(pair), err); - break; - } - } + uint64_t mrs_used; + uint64_t dlused, dlcomp, dluncomp; - if (err == 0) - err = dsl_sync_task_group_wait(dstg); + ASSERT(!dsl_dataset_is_snapshot(ds)); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - struct dsl_ds_destroyarg *dsda = dst->dst_arg1; - dsl_dataset_t *ds = dsda->ds; + if (ds->ds_phys->ds_prev_snap_obj != 0) + mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; + else + mrs_used = 0; - /* - * Return the snapshots that triggered the error. - */ - if (dst->dst_err != 0) { - char name[ZFS_MAXNAMELEN]; - dsl_dataset_name(ds, name); - fnvlist_add_int32(errlist, name, dst->dst_err); - } - ASSERT3P(dsda->rm_origin, ==, NULL); - dsl_dataset_disown(ds, dstg); - kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); - } + dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - dsl_sync_task_group_destroy(dstg); - spa_close(spa, FTAG); - return (err); + ASSERT3U(dlused, <=, mrs_used); + ds->ds_phys->ds_unique_bytes = + ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); + if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= + SPA_VERSION_UNIQUE_ACCURATE) + ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; } -static boolean_t -dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) +void +dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, + dmu_tx_t *tx) { - boolean_t might_destroy = B_FALSE; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + int err; + ASSERTV(uint64_t count); + + ASSERT(ds->ds_phys->ds_num_children >= 2); + err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); + /* + * The err should not be ENOENT, but a bug in a previous version + * of the code could cause upgrade_clones_cb() to not set + * ds_next_snap_obj when it should, leading to a missing entry. + * If we knew that the pool was created after + * SPA_VERSION_NEXT_CLONES, we could assert that it isn't + * ENOENT. However, at least we can check that we don't have + * too many entries in the next_clones_obj even after failing to + * remove this one. + */ + if (err != ENOENT) + VERIFY0(err); + ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, + &count)); + ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); +} - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && - DS_IS_DEFER_DESTROY(ds)) - might_destroy = B_TRUE; - mutex_exit(&ds->ds_lock); - return (might_destroy); +blkptr_t * +dsl_dataset_get_blkptr(dsl_dataset_t *ds) +{ + return (&ds->ds_phys->ds_bp); } -/* - * If we're removing a clone, and these three conditions are true: - * 1) the clone's origin has no other children - * 2) the clone's origin has no user references - * 3) the clone's origin has been marked for deferred destruction - * Then, prepare to remove the origin as part of this sync task group. - */ -static int -dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) +void +dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) { - dsl_dataset_t *ds = dsda->ds; - dsl_dataset_t *origin = ds->ds_prev; - - if (dsl_dataset_might_destroy_origin(origin)) { - char *name; - int namelen; - int error; - - namelen = dsl_dataset_namelen(origin) + 1; - name = kmem_alloc(namelen, KM_SLEEP); - dsl_dataset_name(origin, name); -#ifdef _KERNEL - error = zfs_unmount_snap(name, NULL); - if (error) { - kmem_free(name, namelen); - return (error); - } -#endif - error = dsl_dataset_own(name, B_TRUE, tag, &origin); - kmem_free(name, namelen); - if (error) - return (error); - dsda->rm_origin = origin; - dsl_dataset_make_exclusive(origin, tag); + ASSERT(dmu_tx_is_syncing(tx)); + /* If it's the meta-objset, set dp_meta_rootbp */ + if (ds == NULL) { + tx->tx_pool->dp_meta_rootbp = *bp; + } else { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_bp = *bp; } +} - return (0); +spa_t * +dsl_dataset_get_spa(dsl_dataset_t *ds) +{ + return (ds->ds_dir->dd_pool->dp_spa); } -/* - * ds must be opened as OWNER. On return (whether successful or not), - * ds will be closed and caller can no longer dereference it. - */ -int -dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) +void +dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) { - int err; - dsl_sync_task_group_t *dstg; - objset_t *os; - dsl_dir_t *dd; - uint64_t obj; - struct dsl_ds_destroyarg dsda = { 0 }; + dsl_pool_t *dp; - dsda.ds = ds; - - if (dsl_dataset_is_snapshot(ds)) { - /* Destroying a snapshot is simpler */ - dsl_dataset_make_exclusive(ds, tag); - - dsda.defer = defer; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_destroy_check, dsl_dataset_destroy_sync, - &dsda, tag, 0); - ASSERT3P(dsda.rm_origin, ==, NULL); - goto out; - } else if (defer) { - err = EINVAL; - goto out; - } - - dd = ds->ds_dir; - - if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - /* - * Check for errors and mark this ds as inconsistent, in - * case we crash while freeing the objects. - */ - err = dsl_sync_task_do(dd->dd_pool, - dsl_dataset_destroy_begin_check, - dsl_dataset_destroy_begin_sync, ds, NULL, 0); - if (err) - goto out; - - err = dmu_objset_from_ds(ds, &os); - if (err) - goto out; - - /* - * Remove all objects while in the open context so that - * there is less work to do in the syncing context. - */ - for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, - ds->ds_phys->ds_prev_snap_txg)) { - /* - * Ignore errors, if there is not enough disk space - * we will deal with it in dsl_dataset_destroy_sync(). - */ - (void) dmu_free_object(os, obj); - } - if (err != ESRCH) - goto out; - - /* - * Sync out all in-flight IO. - */ - txg_wait_synced(dd->dd_pool, 0); - - /* - * If we managed to free all the objects in open - * context, the user space accounting should be zero. - */ - if (ds->ds_phys->ds_bp.blk_fill == 0 && - dmu_objset_userused_enabled(os)) { - ASSERTV(uint64_t count); - - ASSERT(zap_count(os, DMU_USERUSED_OBJECT, - &count) != 0 || count == 0); - ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, - &count) != 0 || count == 0); - } - } - - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); - rw_exit(&dd->dd_pool->dp_config_rwlock); - - if (err) - goto out; - - /* - * Blow away the dsl_dir + head dataset. - */ - dsl_dataset_make_exclusive(ds, tag); - /* - * If we're removing a clone, we might also need to remove its - * origin. - */ - do { - dsda.need_prep = B_FALSE; - if (dsl_dir_is_clone(dd)) { - err = dsl_dataset_origin_rm_prep(&dsda, tag); - if (err) { - dsl_dir_close(dd, FTAG); - goto out; - } - } - - dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); - dsl_sync_task_create(dstg, dsl_dataset_destroy_check, - dsl_dataset_destroy_sync, &dsda, tag, 0); - dsl_sync_task_create(dstg, dsl_dir_destroy_check, - dsl_dir_destroy_sync, dd, FTAG, 0); - err = dsl_sync_task_group_wait(dstg); - dsl_sync_task_group_destroy(dstg); - - /* - * We could be racing against 'zfs release' or 'zfs destroy -d' - * on the origin snap, in which case we can get EBUSY if we - * needed to destroy the origin snap but were not ready to - * do so. - */ - if (dsda.need_prep) { - ASSERT(err == EBUSY); - ASSERT(dsl_dir_is_clone(dd)); - ASSERT(dsda.rm_origin == NULL); - } - } while (dsda.need_prep); - - if (dsda.rm_origin != NULL) - dsl_dataset_disown(dsda.rm_origin, tag); - - /* if it is successful, dsl_dir_destroy_sync will close the dd */ - if (err) - dsl_dir_close(dd, FTAG); - -out: - dsl_dataset_disown(ds, tag); - return (err); -} - -blkptr_t * -dsl_dataset_get_blkptr(dsl_dataset_t *ds) -{ - return (&ds->ds_phys->ds_bp); -} - -void -dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) -{ - ASSERT(dmu_tx_is_syncing(tx)); - /* If it's the meta-objset, set dp_meta_rootbp */ - if (ds == NULL) { - tx->tx_pool->dp_meta_rootbp = *bp; - } else { - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_bp = *bp; - } -} - -spa_t * -dsl_dataset_get_spa(dsl_dataset_t *ds) -{ - return (ds->ds_dir->dd_pool->dp_spa); -} - -void -dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - dsl_pool_t *dp; - - if (ds == NULL) /* this is the meta-objset */ - return; + if (ds == NULL) /* this is the meta-objset */ + return; ASSERT(ds->ds_objset != NULL); @@ -1210,7 +875,7 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) dp = ds->ds_dir->dd_pool; - if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { + if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); } @@ -1229,850 +894,145 @@ dsl_dataset_is_dirty(dsl_dataset_t *ds) return (B_FALSE); } -/* - * The unique space in the head dataset can be calculated by subtracting - * the space used in the most recent snapshot, that is still being used - * in this file system, from the space currently in use. To figure out - * the space in the most recent snapshot still in use, we need to take - * the total space used in the snapshot and subtract out the space that - * has been freed up since the snapshot was taken. - */ -static void -dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) -{ - uint64_t mrs_used; - uint64_t dlused, dlcomp, dluncomp; - - ASSERT(!dsl_dataset_is_snapshot(ds)); - - if (ds->ds_phys->ds_prev_snap_obj != 0) - mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; - else - mrs_used = 0; - - dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - - ASSERT3U(dlused, <=, mrs_used); - ds->ds_phys->ds_unique_bytes = - ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); - - if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= - SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; -} - -struct killarg { - dsl_dataset_t *ds; - dmu_tx_t *tx; -}; - -/* ARGSUSED */ static int -kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) { - struct killarg *ka = arg; - dmu_tx_t *tx = ka->tx; + uint64_t asize; - if (bp == NULL) + if (!dmu_tx_is_syncing(tx)) return (0); - if (zb->zb_level == ZB_ZIL_LEVEL) { - ASSERT(zilog != NULL); - /* - * It's a block in the intent log. It has no - * accounting, so just free it. - */ - dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); - } else { - ASSERT(zilog == NULL); - ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); - (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); - } - - return (0); -} - -/* ARGSUSED */ -static int -dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t count; - int err; - /* - * Can't delete a head dataset if there are snapshots of it. - * (Except if the only snapshots are from the branch we cloned - * from.) + * If there's an fs-only reservation, any blocks that might become + * owned by the snapshot dataset must be accommodated by space + * outside of the reservation. */ - if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) - return (EBUSY); + ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); + asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) + return (ENOSPC); /* - * This is really a dsl_dir thing, but check it here so that - * we'll be less likely to leave this dataset inconsistent & - * nearly destroyed. + * Propagate any reserved space for this snapshot to other + * snapshot checks in this sync group. */ - err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); - if (err) - return (err); - if (count != 0) - return (EEXIST); + if (asize > 0) + dsl_dir_willuse_space(ds->ds_dir, asize, tx); return (0); } -/* ARGSUSED */ -static void -dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - - /* Mark it as inconsistent on-disk, in case we crash */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; - - spa_history_log_internal_ds(ds, "destroy begin", tx, ""); -} - -static int -dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, - dmu_tx_t *tx) -{ - dsl_dataset_t *ds = dsda->ds; - dsl_dataset_t *ds_prev = ds->ds_prev; - - if (dsl_dataset_might_destroy_origin(ds_prev)) { - struct dsl_ds_destroyarg ndsda = {0}; - - /* - * If we're not prepared to remove the origin, don't remove - * the clone either. - */ - if (dsda->rm_origin == NULL) { - dsda->need_prep = B_TRUE; - return (EBUSY); - } - - ndsda.ds = ds_prev; - ndsda.is_origin_rm = B_TRUE; - return (dsl_dataset_destroy_check(&ndsda, tag, tx)); - } - - /* - * If we're not going to remove the origin after all, - * undo the open context setup. - */ - if (dsda->rm_origin != NULL) { - dsl_dataset_disown(dsda->rm_origin, tag); - dsda->rm_origin = NULL; - } - - return (0); -} +typedef struct dsl_dataset_snapshot_arg { + nvlist_t *ddsa_snaps; + nvlist_t *ddsa_props; + nvlist_t *ddsa_errors; +} dsl_dataset_snapshot_arg_t; -/* - * If you add new checks here, you may need to add - * additional checks to the "temporary" case in - * snapshot_check() in dmu_objset.c. - */ -/* ARGSUSED */ int -dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - struct dsl_ds_destroyarg *dsda = arg1; - dsl_dataset_t *ds = dsda->ds; + int error; + uint64_t value; - /* we have an owner hold, so noone else can destroy us */ - ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); + ds->ds_trysnap_txg = tx->tx_txg; - /* - * Only allow deferred destroy on pools that support it. - * NOTE: deferred destroy is only supported on snapshots. - */ - if (dsda->defer) { - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < - SPA_VERSION_USERREFS) - return (ENOTSUP); - ASSERT(dsl_dataset_is_snapshot(ds)); + if (!dmu_tx_is_syncing(tx)) return (0); - } /* - * Can't delete a head dataset if there are snapshots of it. - * (Except if the only snapshots are from the branch we cloned - * from.) + * We don't allow multiple snapshots of the same txg. If there + * is already one, try again. */ - if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) - return (EBUSY); + if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) + return (EAGAIN); /* - * If we made changes this txg, traverse_dsl_dataset won't find - * them. Try again. + * Check for conflicting snapshot name. */ - if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) - return (EAGAIN); - - if (dsl_dataset_is_snapshot(ds)) { - /* - * If this snapshot has an elevated user reference count, - * we can't destroy it yet. - */ - if (ds->ds_userrefs > 0 && !dsda->releasing) - return (EBUSY); + error = dsl_dataset_snap_lookup(ds, snapname, &value); + if (error == 0) + return (EEXIST); + if (error != ENOENT) + return (error); - mutex_enter(&ds->ds_lock); - /* - * Can't delete a branch point. However, if we're destroying - * a clone and removing its origin due to it having a user - * hold count of 0 and having been marked for deferred destroy, - * it's OK for the origin to have a single clone. - */ - if (ds->ds_phys->ds_num_children > - (dsda->is_origin_rm ? 2 : 1)) { - mutex_exit(&ds->ds_lock); - return (EEXIST); - } - mutex_exit(&ds->ds_lock); - } else if (dsl_dir_is_clone(ds->ds_dir)) { - return (dsl_dataset_origin_check(dsda, arg2, tx)); - } + error = dsl_dataset_snapshot_reserve_space(ds, tx); + if (error != 0) + return (error); - /* XXX we should do some i/o error checking... */ return (0); } -struct refsarg { - kmutex_t lock; - boolean_t gone; - kcondvar_t cv; -}; - -/* ARGSUSED */ -static void -dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) +static int +dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) { - struct refsarg *arg = argv; - - mutex_enter(&arg->lock); - arg->gone = TRUE; - cv_signal(&arg->cv); - mutex_exit(&arg->lock); -} + dsl_dataset_snapshot_arg_t *ddsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; -static void -dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) -{ - struct refsarg arg; + for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { + int error = 0; + dsl_dataset_t *ds; + char *name, *atp; + char dsname[MAXNAMELEN]; + + name = nvpair_name(pair); + if (strlen(name) >= MAXNAMELEN) + error = ENAMETOOLONG; + if (error == 0) { + atp = strchr(name, '@'); + if (atp == NULL) + error = EINVAL; + if (error == 0) + (void) strlcpy(dsname, name, atp - name + 1); + } + if (error == 0) + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == 0) { + error = dsl_dataset_snapshot_check_impl(ds, + atp + 1, tx); + dsl_dataset_rele(ds, FTAG); + } - mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); - arg.gone = FALSE; - (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, - dsl_dataset_refs_gone); - dmu_buf_rele(ds->ds_dbuf, tag); - mutex_enter(&arg.lock); - while (!arg.gone) - cv_wait(&arg.cv, &arg.lock); - ASSERT(arg.gone); - mutex_exit(&arg.lock); - ds->ds_dbuf = NULL; - ds->ds_phys = NULL; - mutex_destroy(&arg.lock); - cv_destroy(&arg.cv); + if (error != 0) { + if (ddsa->ddsa_errors != NULL) { + fnvlist_add_int32(ddsa->ddsa_errors, + name, error); + } + rv = error; + } + } + return (rv); } -static void -remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx) +void +dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, + dmu_tx_t *tx) { - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - int err; - ASSERTV(uint64_t count); + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dmu_buf_t *dbuf; + dsl_dataset_phys_t *dsphys; + uint64_t dsobj, crtxg; + objset_t *mos = dp->dp_meta_objset; + ASSERTV(static zil_header_t zero_zil); + ASSERTV(objset_t *os); + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); - ASSERT(ds->ds_phys->ds_num_children >= 2); - err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); /* - * The err should not be ENOENT, but a bug in a previous version - * of the code could cause upgrade_clones_cb() to not set - * ds_next_snap_obj when it should, leading to a missing entry. - * If we knew that the pool was created after - * SPA_VERSION_NEXT_CLONES, we could assert that it isn't - * ENOENT. However, at least we can check that we don't have - * too many entries in the next_clones_obj even after failing to - * remove this one. + * If we are on an old pool, the zil must not be active, in which + * case it will be zeroed. Usually zil_suspend() accomplishes this. */ - if (err != ENOENT) { - VERIFY0(err); - } - ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, - &count)); - ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); -} + ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || + dmu_objset_from_ds(ds, &os) != 0 || + bcmp(&os->os_phys->os_zil_header, &zero_zil, + sizeof (zero_zil)) == 0); -static void -dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) -{ - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; /* - * If it is the old version, dd_clones doesn't exist so we can't - * find the clones, but deadlist_remove_key() is a no-op so it - * doesn't matter. - */ - if (ds->ds_dir->dd_phys->dd_clones == 0) - return; - - for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); - zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - dsl_dataset_t *clone; - - VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone)); - if (clone->ds_dir->dd_origin_txg > mintxg) { - dsl_deadlist_remove_key(&clone->ds_deadlist, - mintxg, tx); - dsl_dataset_remove_clones_key(clone, mintxg, tx); - } - dsl_dataset_rele(clone, FTAG); - } - zap_cursor_fini(&zc); -} - -struct process_old_arg { - dsl_dataset_t *ds; - dsl_dataset_t *ds_prev; - boolean_t after_branch_point; - zio_t *pio; - uint64_t used, comp, uncomp; -}; - -static int -process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - struct process_old_arg *poa = arg; - dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; - - if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { - dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); - if (poa->ds_prev && !poa->after_branch_point && - bp->blk_birth > - poa->ds_prev->ds_phys->ds_prev_snap_txg) { - poa->ds_prev->ds_phys->ds_unique_bytes += - bp_get_dsize_sync(dp->dp_spa, bp); - } - } else { - poa->used += bp_get_dsize_sync(dp->dp_spa, bp); - poa->comp += BP_GET_PSIZE(bp); - poa->uncomp += BP_GET_UCSIZE(bp); - dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); - } - return (0); -} - -static void -process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, - dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) -{ - struct process_old_arg poa = { 0 }; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - - ASSERT(ds->ds_deadlist.dl_oldfmt); - ASSERT(ds_next->ds_deadlist.dl_oldfmt); - - poa.ds = ds; - poa.ds_prev = ds_prev; - poa.after_branch_point = after_branch_point; - poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); - VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, - process_old_cb, &poa, tx)); - VERIFY0(zio_wait(poa.pio)); - ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); - - /* change snapused */ - dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, - -poa.used, -poa.comp, -poa.uncomp, tx); - - /* swap next's deadlist to our deadlist */ - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_close(&ds_next->ds_deadlist); - SWITCH64(ds_next->ds_phys->ds_deadlist_obj, - ds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&ds_next->ds_deadlist, mos, - ds_next->ds_phys->ds_deadlist_obj); -} - -static int -old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - int err; - struct killarg ka; - - /* - * Free everything that we point to (that's born after - * the previous snapshot, if we are a clone) - * - * NB: this should be very quick, because we already - * freed all the objects in open context. - */ - ka.ds = ds; - ka.tx = tx; - err = traverse_dataset(ds, - ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka); - ASSERT0(err); - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); - - return (err); -} - -void -dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) -{ - struct dsl_ds_destroyarg *dsda = arg1; - dsl_dataset_t *ds = dsda->ds; - int err = 0; - int after_branch_point = FALSE; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - dsl_dataset_t *ds_prev = NULL; - boolean_t wont_destroy; - uint64_t obj; - - wont_destroy = (dsda->defer && - (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)); - - ASSERT(ds->ds_owner || wont_destroy); - ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); - ASSERT(ds->ds_prev == NULL || - ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); - ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); - - if (wont_destroy) { - ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; - spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); - return; - } - - /* We need to log before removing it from the namespace. */ - spa_history_log_internal_ds(ds, "destroy", tx, ""); - - /* signal any waiters that this dataset is going away */ - mutex_enter(&ds->ds_lock); - ds->ds_owner = dsl_reaper; - cv_broadcast(&ds->ds_exclusive_cv); - mutex_exit(&ds->ds_lock); - - /* Remove our reservation */ - if (ds->ds_reserved != 0) { - dsl_prop_setarg_t psa; - uint64_t value = 0; - - dsl_prop_setarg_init_uint64(&psa, "refreservation", - (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), - &value); - psa.psa_effective_value = 0; /* predict default value */ - - dsl_dataset_set_reservation_sync(ds, &psa, tx); - ASSERT0(ds->ds_reserved); - } - - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); - - dsl_scan_ds_destroyed(ds, tx); - - obj = ds->ds_object; - - if (ds->ds_phys->ds_prev_snap_obj != 0) { - if (ds->ds_prev) { - ds_prev = ds->ds_prev; - } else { - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); - } - after_branch_point = - (ds_prev->ds_phys->ds_next_snap_obj != obj); - - dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); - if (after_branch_point && - ds_prev->ds_phys->ds_next_clones_obj != 0) { - remove_from_next_clones(ds_prev, obj, tx); - if (ds->ds_phys->ds_next_snap_obj != 0) { - VERIFY(0 == zap_add_int(mos, - ds_prev->ds_phys->ds_next_clones_obj, - ds->ds_phys->ds_next_snap_obj, tx)); - } - } - if (after_branch_point && - ds->ds_phys->ds_next_snap_obj == 0) { - /* This clone is toast. */ - ASSERT(ds_prev->ds_phys->ds_num_children > 1); - ds_prev->ds_phys->ds_num_children--; - - /* - * If the clone's origin has no other clones, no - * user holds, and has been marked for deferred - * deletion, then we should have done the necessary - * destroy setup for it. - */ - if (ds_prev->ds_phys->ds_num_children == 1 && - ds_prev->ds_userrefs == 0 && - DS_IS_DEFER_DESTROY(ds_prev)) { - ASSERT3P(dsda->rm_origin, !=, NULL); - } else { - ASSERT3P(dsda->rm_origin, ==, NULL); - } - } else if (!after_branch_point) { - ds_prev->ds_phys->ds_next_snap_obj = - ds->ds_phys->ds_next_snap_obj; - } - } - - if (dsl_dataset_is_snapshot(ds)) { - dsl_dataset_t *ds_next; - uint64_t old_unique; - uint64_t used = 0, comp = 0, uncomp = 0; - - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); - ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); - - old_unique = ds_next->ds_phys->ds_unique_bytes; - - dmu_buf_will_dirty(ds_next->ds_dbuf, tx); - ds_next->ds_phys->ds_prev_snap_obj = - ds->ds_phys->ds_prev_snap_obj; - ds_next->ds_phys->ds_prev_snap_txg = - ds->ds_phys->ds_prev_snap_txg; - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); - - - if (ds_next->ds_deadlist.dl_oldfmt) { - process_old_deadlist(ds, ds_prev, ds_next, - after_branch_point, tx); - } else { - /* Adjust prev's unique space. */ - if (ds_prev && !after_branch_point) { - dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds_prev->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_prev_snap_txg, - &used, &comp, &uncomp); - ds_prev->ds_phys->ds_unique_bytes += used; - } - - /* Adjust snapused. */ - dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &used, &comp, &uncomp); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, - -used, -comp, -uncomp, tx); - - /* Move blocks to be freed to pool's free list. */ - dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, - &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, - tx); - dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, - DD_USED_HEAD, used, comp, uncomp, tx); - - /* Merge our deadlist into next's and free it. */ - dsl_deadlist_merge(&ds_next->ds_deadlist, - ds->ds_phys->ds_deadlist_obj, tx); - } - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); - - /* Collapse range in clone heads */ - dsl_dataset_remove_clones_key(ds, - ds->ds_phys->ds_creation_txg, tx); - - if (dsl_dataset_is_snapshot(ds_next)) { - dsl_dataset_t *ds_nextnext; - dsl_dataset_t *hds; - - /* - * Update next's unique to include blocks which - * were previously shared by only this snapshot - * and it. Those blocks will be born after the - * prev snap and before this snap, and will have - * died after the next snap and before the one - * after that (ie. be on the snap after next's - * deadlist). - */ - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds_next->ds_phys->ds_next_snap_obj, - FTAG, &ds_nextnext)); - dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_creation_txg, - &used, &comp, &uncomp); - ds_next->ds_phys->ds_unique_bytes += used; - dsl_dataset_rele(ds_nextnext, FTAG); - ASSERT3P(ds_next->ds_prev, ==, NULL); - - /* Collapse range in this head. */ - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, - FTAG, &hds)); - dsl_deadlist_remove_key(&hds->ds_deadlist, - ds->ds_phys->ds_creation_txg, tx); - dsl_dataset_rele(hds, FTAG); - - } else { - ASSERT3P(ds_next->ds_prev, ==, ds); - dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); - ds_next->ds_prev = NULL; - if (ds_prev) { - VERIFY(0 == dsl_dataset_get_ref(dp, - ds->ds_phys->ds_prev_snap_obj, - ds_next, &ds_next->ds_prev)); - } - - dsl_dataset_recalc_head_uniq(ds_next); - - /* - * Reduce the amount of our unconsmed refreservation - * being charged to our parent by the amount of - * new unique data we have gained. - */ - if (old_unique < ds_next->ds_reserved) { - int64_t mrsdelta; - uint64_t new_unique = - ds_next->ds_phys->ds_unique_bytes; - - ASSERT(old_unique <= new_unique); - mrsdelta = MIN(new_unique - old_unique, - ds_next->ds_reserved - old_unique); - dsl_dir_diduse_space(ds->ds_dir, - DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); - } - } - dsl_dataset_rele(ds_next, FTAG); - } else { - zfeature_info_t *async_destroy = - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; - objset_t *os; - - /* - * There's no next snapshot, so this is a head dataset. - * Destroy the deadlist. Unless it's a clone, the - * deadlist should be empty. (If it's a clone, it's - * safe to ignore the deadlist contents.) - */ - dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); - ds->ds_phys->ds_deadlist_obj = 0; - - VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); - - if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { - err = old_synchronous_dataset_destroy(ds, tx); - } else { - /* - * Move the bptree into the pool's list of trees to - * clean up and update space accounting information. - */ - uint64_t used, comp, uncomp; - - zil_destroy_sync(dmu_objset_zil(os), tx); - - if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { - spa_feature_incr(dp->dp_spa, async_destroy, tx); - dp->dp_bptree_obj = bptree_alloc(mos, tx); - VERIFY(zap_add(mos, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, - &dp->dp_bptree_obj, tx) == 0); - } - - used = ds->ds_dir->dd_phys->dd_used_bytes; - comp = ds->ds_dir->dd_phys->dd_compressed_bytes; - uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; - - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || - ds->ds_phys->ds_unique_bytes == used); - - bptree_add(mos, dp->dp_bptree_obj, - &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, - used, comp, uncomp, tx); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, - -used, -comp, -uncomp, tx); - dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, - used, comp, uncomp, tx); - } - - if (ds->ds_prev != NULL) { - if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { - VERIFY3U(0, ==, zap_remove_int(mos, - ds->ds_prev->ds_dir->dd_phys->dd_clones, - ds->ds_object, tx)); - } - dsl_dataset_rele(ds->ds_prev, ds); - ds->ds_prev = ds_prev = NULL; - } - } - - /* - * This must be done after the dsl_traverse(), because it will - * re-open the objset. - */ - if (ds->ds_objset) { - dmu_objset_evict(ds->ds_objset); - ds->ds_objset = NULL; - } - - if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { - /* Erase the link in the dir */ - dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); - ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; - ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); - err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); - ASSERT(err == 0); - } else { - /* remove from snapshot namespace */ - dsl_dataset_t *ds_head; - ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); - VERIFY(0 == dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); - VERIFY(0 == dsl_dataset_get_snapname(ds)); -#ifdef ZFS_DEBUG - { - uint64_t val; - - err = dsl_dataset_snap_lookup(ds_head, - ds->ds_snapname, &val); - ASSERT0(err); - ASSERT3U(val, ==, obj); - } -#endif - err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); - ASSERT(err == 0); - dsl_dataset_rele(ds_head, FTAG); - } - - if (ds_prev && ds->ds_prev != ds_prev) - dsl_dataset_rele(ds_prev, FTAG); - - spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - - if (ds->ds_phys->ds_next_clones_obj != 0) { - ASSERTV(uint64_t count); - ASSERT(0 == zap_count(mos, - ds->ds_phys->ds_next_clones_obj, &count) && count == 0); - VERIFY(0 == dmu_object_free(mos, - ds->ds_phys->ds_next_clones_obj, tx)); - } - if (ds->ds_phys->ds_props_obj != 0) - VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); - if (ds->ds_phys->ds_userrefs_obj != 0) - VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); - dsl_dir_close(ds->ds_dir, ds); - ds->ds_dir = NULL; - dsl_dataset_drain_refs(ds, tag); - VERIFY(0 == dmu_object_free(mos, obj, tx)); - - if (dsda->rm_origin) { - /* - * Remove the origin of the clone we just destroyed. - */ - struct dsl_ds_destroyarg ndsda = {0}; - - ndsda.ds = dsda->rm_origin; - dsl_dataset_destroy_sync(&ndsda, tag, tx); - } -} - -static int -dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) -{ - uint64_t asize; - - if (!dmu_tx_is_syncing(tx)) - return (0); - - /* - * If there's an fs-only reservation, any blocks that might become - * owned by the snapshot dataset must be accommodated by space - * outside of the reservation. - */ - ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); - asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); - if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); - - /* - * Propagate any reserved space for this snapshot to other - * snapshot checks in this sync group. - */ - if (asize > 0) - dsl_dir_willuse_space(ds->ds_dir, asize, tx); - - return (0); -} - -int -dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, - dmu_tx_t *tx) -{ - int err; - uint64_t value; - - /* - * We don't allow multiple snapshots of the same txg. If there - * is already one, try again. - */ - if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) - return (EAGAIN); - - /* - * Check for conflicting snapshot name. - */ - err = dsl_dataset_snap_lookup(ds, snapname, &value); - if (err == 0) - return (EEXIST); - if (err != ENOENT) - return (err); - - /* - * Check that the dataset's name is not too long. Name consists - * of the dataset's length + 1 for the @-sign + snapshot name's length - */ - if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) - return (ENAMETOOLONG); - - err = dsl_dataset_snapshot_reserve_space(ds, tx); - if (err) - return (err); - - ds->ds_trysnap_txg = tx->tx_txg; - return (0); -} - -void -dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, - dmu_tx_t *tx) -{ - dsl_pool_t *dp = ds->ds_dir->dd_pool; - dmu_buf_t *dbuf; - dsl_dataset_phys_t *dsphys; - uint64_t dsobj, crtxg; - objset_t *mos = dp->dp_meta_objset; - int err; - - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); - - /* - * The origin's ds_creation_txg has to be < TXG_INITIAL + * The origin's ds_creation_txg has to be < TXG_INITIAL */ if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) crtxg = 1; @@ -2081,7 +1041,7 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); - VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); + VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); dsphys = dbuf->db_data; bzero(dsphys, sizeof (dsl_dataset_phys_t)); @@ -2116,9 +1076,9 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, ds->ds_prev->ds_phys->ds_creation_txg); ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; } else if (next_clones_obj != 0) { - remove_from_next_clones(ds->ds_prev, + dsl_dataset_remove_from_next_clones(ds->ds_prev, dsphys->ds_next_snap_obj, tx); - VERIFY3U(0, ==, zap_add_int(mos, + VERIFY0(zap_add_int(mos, next_clones_obj, dsobj, tx)); } } @@ -2137,9 +1097,6 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, } dmu_buf_will_dirty(ds->ds_dbuf, tx); - zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu", - ds->ds_dir->dd_myname, snapname, dsobj, - ds->ds_phys->ds_prev_snap_txg); ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); dsl_deadlist_close(&ds->ds_deadlist); @@ -2154,13 +1111,12 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; - err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, - snapname, 8, 1, &dsobj, tx); - ASSERT(err == 0); + VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, + snapname, 8, 1, &dsobj, tx)); if (ds->ds_prev) - dsl_dataset_drop_ref(ds->ds_prev, ds); - VERIFY(0 == dsl_dataset_get_ref(dp, + dsl_dataset_rele(ds->ds_prev, ds); + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); dsl_scan_ds_snapshotted(ds, tx); @@ -2170,88 +1126,264 @@ dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname, spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); } -void -dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) +static void +dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) { - ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(ds->ds_objset != NULL); - ASSERT(ds->ds_phys->ds_next_snap_obj == 0); - - /* - * in case we had to change ds_fsid_guid when we opened it, - * sync it out now. - */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; + dsl_dataset_snapshot_arg_t *ddsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; - dmu_objset_sync(ds->ds_objset, zio, tx); + for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { + dsl_dataset_t *ds; + char *name, *atp; + char dsname[MAXNAMELEN]; + + name = nvpair_name(pair); + atp = strchr(name, '@'); + (void) strlcpy(dsname, name, atp - name + 1); + VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); + + dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); + if (ddsa->ddsa_props != NULL) { + dsl_props_set_sync_impl(ds->ds_prev, + ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); + } + dsl_dataset_rele(ds, FTAG); + } } -static void -get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +/* + * The snapshots must all be in the same pool. + * All-or-nothing: if there are any failures, nothing will be modified. + */ +int +dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) { - uint64_t count = 0; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - zap_cursor_t zc; - zap_attribute_t za; - nvlist_t *propval; - nvlist_t *val; + dsl_dataset_snapshot_arg_t ddsa; + nvpair_t *pair; + boolean_t needsuspend; + int error; + spa_t *spa; + char *firstname; + nvlist_t *suspended = NULL; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0); + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + firstname = nvpair_name(pair); + + error = spa_open(firstname, &spa, FTAG); + if (error != 0) + return (error); + needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + spa_close(spa, FTAG); + + if (needsuspend) { + suspended = fnvlist_alloc(); + for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; + pair = nvlist_next_nvpair(snaps, pair)) { + char fsname[MAXNAMELEN]; + char *snapname = nvpair_name(pair); + char *atp; + void *cookie; + + atp = strchr(snapname, '@'); + if (atp == NULL) { + error = EINVAL; + break; + } + (void) strlcpy(fsname, snapname, atp - snapname + 1); + + error = zil_suspend(fsname, &cookie); + if (error != 0) + break; + fnvlist_add_uint64(suspended, fsname, + (uintptr_t)cookie); + } + } + + ddsa.ddsa_snaps = snaps; + ddsa.ddsa_props = props; + ddsa.ddsa_errors = errors; + + if (error == 0) { + error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, + dsl_dataset_snapshot_sync, &ddsa, + fnvlist_num_pairs(snaps) * 3); + } + + if (suspended != NULL) { + for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; + pair = nvlist_next_nvpair(suspended, pair)) { + zil_resume((void *)(uintptr_t) + fnvpair_value_uint64(pair)); + } + fnvlist_free(suspended); + } + + return (error); +} + +typedef struct dsl_dataset_snapshot_tmp_arg { + const char *ddsta_fsname; + const char *ddsta_snapname; + minor_t ddsta_cleanup_minor; + const char *ddsta_htag; +} dsl_dataset_snapshot_tmp_arg_t; + +static int +dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); + if (error != 0) + return (error); + + error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, tx); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + + if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { + dsl_dataset_rele(ds, FTAG); + return (ENOTSUP); + } + error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, + B_TRUE, tx); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + +static void +dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); + + dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); + dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, + ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); + dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); + + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, + minor_t cleanup_minor, const char *htag) +{ + dsl_dataset_snapshot_tmp_arg_t ddsta; + int error; + spa_t *spa; + boolean_t needsuspend; + void *cookie; + + ddsta.ddsta_fsname = fsname; + ddsta.ddsta_snapname = snapname; + ddsta.ddsta_cleanup_minor = cleanup_minor; + ddsta.ddsta_htag = htag; + + error = spa_open(fsname, &spa, FTAG); + if (error != 0) + return (error); + needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); + spa_close(spa, FTAG); + + if (needsuspend) { + error = zil_suspend(fsname, &cookie); + if (error != 0) + return (error); + } + + error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, + dsl_dataset_snapshot_tmp_sync, &ddsta, 3); + + if (needsuspend) + zil_resume(cookie); + return (error); +} + + +void +dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) +{ + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT(ds->ds_objset != NULL); + ASSERT(ds->ds_phys->ds_next_snap_obj == 0); + + /* + * in case we had to change ds_fsid_guid when we opened it, + * sync it out now. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; + + dmu_objset_sync(ds->ds_objset, zio, tx); +} + +static void +get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t count = 0; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + nvlist_t *propval = fnvlist_alloc(); + nvlist_t *val = fnvlist_alloc(); + + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); /* - * There may me missing entries in ds_next_clones_obj + * There may be missing entries in ds_next_clones_obj * due to a bug in a previous version of the code. * Only trust it if it has the right number of entries. */ if (ds->ds_phys->ds_next_clones_obj != 0) { - ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj, + ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, &count)); } - if (count != ds->ds_phys->ds_num_children - 1) { + if (count != ds->ds_phys->ds_num_children - 1) goto fail; - } for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; char buf[ZFS_MAXNAMELEN]; - /* - * Even though we hold the dp_config_rwlock, the dataset - * may fail to open, returning ENOENT. If there is a - * thread concurrently attempting to destroy this - * dataset, it will have the ds_rwlock held for - * RW_WRITER. Our call to dsl_dataset_hold_obj() -> - * dsl_dataset_hold_ref() will fail its - * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the - * dp_config_rwlock, and wait for the destroy progress - * and signal ds_exclusive_cv. If the destroy was - * successful, we will see that - * DSL_DATASET_IS_DESTROYED(), and return ENOENT. - */ - if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool, - za.za_first_integer, FTAG, &clone) != 0) - continue; + VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); dsl_dir_name(clone->ds_dir, buf); - VERIFY(nvlist_add_boolean(val, buf) == 0); + fnvlist_add_boolean(val, buf); dsl_dataset_rele(clone, FTAG); } zap_cursor_fini(&zc); - VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0); - VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), - propval) == 0); + fnvlist_add_nvlist(propval, ZPROP_VALUE, val); + fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); fail: nvlist_free(val); nvlist_free(propval); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { uint64_t refd, avail, uobjs, aobjs, ratio; + ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); + + ASSERT(dsl_pool_config_held(dp)); ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : (ds->ds_phys->ds_uncompressed_bytes * 100 / @@ -2297,10 +1429,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_dataset_t *prev; int err; - rw_enter(&dp->dp_config_rwlock, RW_READER); err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); - rw_exit(&dp->dp_config_rwlock); if (err == 0) { err = dsl_dataset_space_written(prev, ds, &written, &comp, &uncomp); @@ -2317,6 +1447,9 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + ASSERT(dsl_pool_config_held(dp)); + stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; stat->dds_guid = ds->ds_phys->ds_guid; @@ -2328,16 +1461,14 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); if (dsl_dir_is_clone(ds->ds_dir)) { dsl_dataset_t *ods; - VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); dsl_dataset_name(ods, stat->dds_origin); - dsl_dataset_drop_ref(ods, FTAG); + dsl_dataset_rele(ods, FTAG); } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); } } @@ -2375,8 +1506,7 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) { ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool); - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); if (ds->ds_prev == NULL) return (B_FALSE); if (ds->ds_phys->ds_bp.blk_birth > @@ -2398,237 +1528,225 @@ dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) return (B_FALSE); } +typedef struct dsl_dataset_rename_snapshot_arg { + const char *ddrsa_fsname; + const char *ddrsa_oldsnapname; + const char *ddrsa_newsnapname; + boolean_t ddrsa_recursive; + dmu_tx_t *ddrsa_tx; +} dsl_dataset_rename_snapshot_arg_t; + /* ARGSUSED */ static int -dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, + dsl_dataset_t *hds, void *arg) { - dsl_dataset_t *ds = arg1; - char *newsnapname = arg2; - dsl_dir_t *dd = ds->ds_dir; - dsl_dataset_t *hds; + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + int error; uint64_t val; - int err; - - err = dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); - if (err) - return (err); - /* new name better not be in use */ - err = dsl_dataset_snap_lookup(hds, newsnapname, &val); - dsl_dataset_rele(hds, FTAG); + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); + if (error != 0) { + /* ignore nonexistent snapshots */ + return (error == ENOENT ? 0 : error); + } - if (err == 0) - err = EEXIST; - else if (err == ENOENT) - err = 0; + /* new name should not exist */ + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); + if (error == 0) + error = EEXIST; + else if (error == ENOENT) + error = 0; /* dataset name + 1 for the "@" + the new snapshot name must fit */ - if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) - err = ENAMETOOLONG; + if (dsl_dir_namelen(hds->ds_dir) + 1 + + strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) + error = ENAMETOOLONG; - return (err); + return (error); } -static void -dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +static int +dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - const char *newsnapname = arg2; - dsl_dir_t *dd = ds->ds_dir; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *hds; - int err; - - ASSERT(ds->ds_phys->ds_next_snap_obj != 0); - - VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + int error; - VERIFY(0 == dsl_dataset_get_snapname(ds)); - err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); - ASSERT0(err); - mutex_enter(&ds->ds_lock); - (void) strcpy(ds->ds_snapname, newsnapname); - mutex_exit(&ds->ds_lock); - err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, - ds->ds_snapname, 8, 1, &ds->ds_object, tx); - ASSERT0(err); + error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); + if (error != 0) + return (error); - spa_history_log_internal_ds(ds, "rename", tx, - "-> @%s", newsnapname); + if (ddrsa->ddrsa_recursive) { + error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, + dsl_dataset_rename_snapshot_check_impl, ddrsa, + DS_FIND_CHILDREN); + } else { + error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); + } dsl_dataset_rele(hds, FTAG); + return (error); } -struct renamesnaparg { - dsl_sync_task_group_t *dstg; - char failed[MAXPATHLEN]; - char *oldsnap; - char *newsnap; -}; - static int -dsl_snapshot_rename_one(const char *name, void *arg) +dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, + dsl_dataset_t *hds, void *arg) { - struct renamesnaparg *ra = arg; - dsl_dataset_t *ds = NULL; - char *snapname; - int err; - - snapname = kmem_asprintf("%s@%s", name, ra->oldsnap); - (void) strlcpy(ra->failed, snapname, sizeof (ra->failed)); + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_dataset_t *ds; + uint64_t val; + dmu_tx_t *tx = ddrsa->ddrsa_tx; + int error; - /* - * For recursive snapshot renames the parent won't be changing - * so we just pass name for both the to/from argument. - */ - err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); - if (err != 0) { - strfree(snapname); - return (err == ENOENT ? 0 : err); + error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); + ASSERT(error == 0 || error == ENOENT); + if (error == ENOENT) { + /* ignore nonexistent snapshots */ + return (0); } -#ifdef _KERNEL - /* - * For all filesystems undergoing rename, we'll need to unmount it. - */ - (void) zfs_unmount_snap(snapname, NULL); -#endif - err = dsl_dataset_hold(snapname, ra->dstg, &ds); - strfree(snapname); - if (err != 0) - return (err == ENOENT ? 0 : err); + VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); + + /* log before we change the name */ + spa_history_log_internal_ds(ds, "rename", tx, + "-> @%s", ddrsa->ddrsa_newsnapname); - dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, - dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); + VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx)); + mutex_enter(&ds->ds_lock); + (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); + mutex_exit(&ds->ds_lock); + VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, + ds->ds_snapname, 8, 1, &ds->ds_object, tx)); + dsl_dataset_rele(ds, FTAG); return (0); } -static int -dsl_recursive_rename(char *oldname, const char *newname) +static void +dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) { - int err; - struct renamesnaparg *ra; - dsl_sync_task_t *dst; - spa_t *spa; - char *cp, *fsname = spa_strdup(oldname); - int len = strlen(oldname) + 1; - - /* truncate the snapshot name to get the fsname */ - cp = strchr(fsname, '@'); - *cp = '\0'; - - err = spa_open(fsname, &spa, FTAG); - if (err) { - kmem_free(fsname, len); - return (err); - } - ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); - ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - - ra->oldsnap = strchr(oldname, '@') + 1; - ra->newsnap = strchr(newname, '@') + 1; - *ra->failed = '\0'; - - err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, - DS_FIND_CHILDREN); - kmem_free(fsname, len); - - if (err == 0) { - err = dsl_sync_task_group_wait(ra->dstg); - } + dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; - for (dst = list_head(&ra->dstg->dstg_tasks); dst; - dst = list_next(&ra->dstg->dstg_tasks, dst)) { - dsl_dataset_t *ds = dst->dst_arg1; - if (dst->dst_err) { - dsl_dir_name(ds->ds_dir, ra->failed); - (void) strlcat(ra->failed, "@", sizeof (ra->failed)); - (void) strlcat(ra->failed, ra->newsnap, - sizeof (ra->failed)); - } - dsl_dataset_rele(ds, ra->dstg); + VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); + ddrsa->ddrsa_tx = tx; + if (ddrsa->ddrsa_recursive) { + VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, + dsl_dataset_rename_snapshot_sync_impl, ddrsa, + DS_FIND_CHILDREN)); + } else { + VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); } - - if (err) - (void) strlcpy(oldname, ra->failed, sizeof (ra->failed)); - - dsl_sync_task_group_destroy(ra->dstg); - kmem_free(ra, sizeof (struct renamesnaparg)); - spa_close(spa, FTAG); - return (err); + dsl_dataset_rele(hds, FTAG); } -static int -dsl_valid_rename(const char *oldname, void *arg) +int +dsl_dataset_rename_snapshot(const char *fsname, + const char *oldsnapname, const char *newsnapname, boolean_t recursive) { - int delta = *(int *)arg; + dsl_dataset_rename_snapshot_arg_t ddrsa; - if (strlen(oldname) + delta >= MAXNAMELEN) - return (ENAMETOOLONG); + ddrsa.ddrsa_fsname = fsname; + ddrsa.ddrsa_oldsnapname = oldsnapname; + ddrsa.ddrsa_newsnapname = newsnapname; + ddrsa.ddrsa_recursive = recursive; - return (0); + return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, + dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); } -#pragma weak dmu_objset_rename = dsl_dataset_rename -int -dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) +static int +dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd; + const char *fsname = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; - const char *tail; - int err; + int64_t unused_refres_delta; + int error; - err = dsl_dir_open(oldname, FTAG, &dd, &tail); - if (err) - return (err); + error = dsl_dataset_hold(dp, fsname, FTAG, &ds); + if (error != 0) + return (error); - if (tail == NULL) { - int delta = strlen(newname) - strlen(oldname); + /* must not be a snapshot */ + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } - /* if we're growing, validate child name lengths */ - if (delta > 0) - err = dmu_objset_find(oldname, dsl_valid_rename, - &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + /* must have a most recent snapshot */ + if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); + } - if (err == 0) - err = dsl_dir_rename(dd, newname); - dsl_dir_close(dd, FTAG); - return (err); + if (dsl_dataset_long_held(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EBUSY); } - if (tail[0] != '@') { - /* the name ended in a nonexistent component */ - dsl_dir_close(dd, FTAG); - return (ENOENT); + /* + * Check if the snap we are rolling back to uses more than + * the refquota. + */ + if (ds->ds_quota != 0 && + ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { + dsl_dataset_rele(ds, FTAG); + return (EDQUOT); } - dsl_dir_close(dd, FTAG); + /* + * When we do the clone swap, we will temporarily use more space + * due to the refreservation (the head will no longer have any + * unique space, so the entire amount of the refreservation will need + * to be free). We will immediately destroy the clone, freeing + * this space, but the freeing happens over many txg's. + */ + unused_refres_delta = (int64_t)MIN(ds->ds_reserved, + ds->ds_phys->ds_unique_bytes); - /* new name must be snapshot in same filesystem */ - tail = strchr(newname, '@'); - if (tail == NULL) - return (EINVAL); - tail++; - if (strncmp(oldname, newname, tail - newname) != 0) - return (EXDEV); + if (unused_refres_delta > 0 && + unused_refres_delta > + dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { + dsl_dataset_rele(ds, FTAG); + return (ENOSPC); + } - if (recursive) { - err = dsl_recursive_rename(oldname, newname); - } else { - err = dsl_dataset_hold(oldname, FTAG, &ds); - if (err) - return (err); + dsl_dataset_rele(ds, FTAG); + return (0); +} - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_snapshot_rename_check, - dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); +static void +dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) +{ + const char *fsname = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds, *clone; + uint64_t cloneobj; - dsl_dataset_rele(ds, FTAG); - } + VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds)); - return (err); + cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", + ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); + + VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); + + dsl_dataset_clone_swap_sync_impl(clone, ds, tx); + dsl_dataset_zero_zil(ds, tx); + + dsl_destroy_head_sync_impl(clone, tx); + + dsl_dataset_rele(clone, FTAG); + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_dataset_rollback(const char *fsname) +{ + return (dsl_sync_task(fsname, dsl_dataset_rollback_check, + dsl_dataset_rollback_sync, (void *)fsname, 1)); } struct promotenode { @@ -2636,48 +1754,66 @@ struct promotenode { dsl_dataset_t *ds; }; -struct promotearg { +typedef struct dsl_dataset_promote_arg { + const char *ddpa_clonename; + dsl_dataset_t *ddpa_clone; list_t shared_snaps, origin_snaps, clone_snaps; - dsl_dataset_t *origin_origin; + dsl_dataset_t *origin_origin; /* origin of the origin */ uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; char *err_ds; -}; +} dsl_dataset_promote_arg_t; static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); +static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, + void *tag); +static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); static int -dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *hds = arg1; - struct promotearg *pa = arg2; - struct promotenode *snap = list_head(&pa->shared_snaps); - dsl_dataset_t *origin_ds = snap->ds; + dsl_dataset_promote_arg_t *ddpa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; + struct promotenode *snap; + dsl_dataset_t *origin_ds; int err; uint64_t unused; - /* Check that it is a real clone */ - if (!dsl_dir_is_clone(hds->ds_dir)) - return (EINVAL); + err = promote_hold(ddpa, dp, FTAG); + if (err != 0) + return (err); - /* Since this is so expensive, don't do the preliminary check */ - if (!dmu_tx_is_syncing(tx)) - return (0); + hds = ddpa->ddpa_clone; - if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) + if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { + promote_rele(ddpa, FTAG); return (EXDEV); + } + + /* + * Compute and check the amount of space to transfer. Since this is + * so expensive, don't do the preliminary check. + */ + if (!dmu_tx_is_syncing(tx)) { + promote_rele(ddpa, FTAG); + return (0); + } + + snap = list_head(&ddpa->shared_snaps); + origin_ds = snap->ds; /* compute origin's new unique space */ - snap = list_tail(&pa->clone_snaps); + snap = list_tail(&ddpa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); dsl_deadlist_space_range(&snap->ds->ds_deadlist, origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &pa->unique, &unused, &unused); + &ddpa->unique, &unused, &unused); /* * Walk the snapshots that we are moving * * Compute space to transfer. Consider the incremental changes - * to used for each snapshot: + * to used by each snapshot: * (my used) = (prev's used) + (blocks born) - (blocks killed) * So each snapshot gave birth to: * (blocks born) = (my used) - (prev's used) + (blocks killed) @@ -2688,18 +1824,28 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * Note however, if we stop before we reach the ORIGIN we get: * uN + kN + kN-1 + ... + kM - uM-1 */ - pa->used = origin_ds->ds_phys->ds_referenced_bytes; - pa->comp = origin_ds->ds_phys->ds_compressed_bytes; - pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; - for (snap = list_head(&pa->shared_snaps); snap; - snap = list_next(&pa->shared_snaps, snap)) { + ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; + ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; + ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; + for (snap = list_head(&ddpa->shared_snaps); snap; + snap = list_next(&ddpa->shared_snaps, snap)) { uint64_t val, dlused, dlcomp, dluncomp; dsl_dataset_t *ds = snap->ds; + /* + * If there are long holds, we won't be able to evict + * the objset. + */ + if (dsl_dataset_long_held(ds)) { + err = EBUSY; + goto out; + } + /* Check that the snapshot name does not conflict */ - VERIFY(0 == dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_get_snapname(ds)); err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); if (err == 0) { + (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); err = EEXIST; goto out; } @@ -2712,26 +1858,27 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); - pa->used += dlused; - pa->comp += dlcomp; - pa->uncomp += dluncomp; + ddpa->used += dlused; + ddpa->comp += dlcomp; + ddpa->uncomp += dluncomp; } /* * If we are a clone of a clone then we never reached ORIGIN, * so we need to subtract out the clone origin's used space. */ - if (pa->origin_origin) { - pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; - pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; - pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; + if (ddpa->origin_origin) { + ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; + ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; + ddpa->uncomp -= + ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; } /* Check that there is enough space here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, - pa->used); - if (err) - return (err); + ddpa->used); + if (err != 0) + goto out; /* * Compute the amounts of space that will be used by snapshots @@ -2749,68 +1896,75 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) * calls will be fast because they do not have to * iterate over all bps. */ - snap = list_head(&pa->origin_snaps); - err = snaplist_space(&pa->shared_snaps, - snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap); - if (err) - return (err); + snap = list_head(&ddpa->origin_snaps); + err = snaplist_space(&ddpa->shared_snaps, + snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); + if (err != 0) + goto out; - err = snaplist_space(&pa->clone_snaps, + err = snaplist_space(&ddpa->clone_snaps, snap->ds->ds_dir->dd_origin_txg, &space); - if (err) - return (err); - pa->cloneusedsnap += space; + if (err != 0) + goto out; + ddpa->cloneusedsnap += space; } if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { - err = snaplist_space(&pa->origin_snaps, - origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); - if (err) - return (err); + err = snaplist_space(&ddpa->origin_snaps, + origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); + if (err != 0) + goto out; } - return (0); out: - pa->err_ds = snap->ds->ds_snapname; + promote_rele(ddpa, FTAG); return (err); } static void -dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *hds = arg1; - struct promotearg *pa = arg2; - struct promotenode *snap = list_head(&pa->shared_snaps); - dsl_dataset_t *origin_ds = snap->ds; + dsl_dataset_promote_arg_t *ddpa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *hds; + struct promotenode *snap; + dsl_dataset_t *origin_ds; dsl_dataset_t *origin_head; - dsl_dir_t *dd = hds->ds_dir; - dsl_pool_t *dp = hds->ds_dir->dd_pool; + dsl_dir_t *dd; dsl_dir_t *odd = NULL; uint64_t oldnext_obj; int64_t delta; - ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); + VERIFY0(promote_hold(ddpa, dp, FTAG)); + hds = ddpa->ddpa_clone; + + ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); + + snap = list_head(&ddpa->shared_snaps); + origin_ds = snap->ds; + dd = hds->ds_dir; - snap = list_head(&pa->origin_snaps); + snap = list_head(&ddpa->origin_snaps); origin_head = snap->ds; /* * We need to explicitly open odd, since origin_ds's dd will be * changing. */ - VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, + VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; - snap = list_tail(&pa->clone_snaps); + snap = list_tail(&ddpa->clone_snaps); ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; /* change the origin's next clone */ if (origin_ds->ds_phys->ds_next_clones_obj) { - remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + dsl_dataset_remove_from_next_clones(origin_ds, + snap->ds->ds_object, tx); + VERIFY0(zap_add_int(dp->dp_meta_objset, origin_ds->ds_phys->ds_next_clones_obj, oldnext_obj, tx)); } @@ -2827,39 +1981,43 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) /* change dd_clone entries */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + VERIFY0(zap_remove_int(dp->dp_meta_objset, odd->dd_phys->dd_clones, hds->ds_object, tx)); - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, - pa->origin_origin->ds_dir->dd_phys->dd_clones, + VERIFY0(zap_add_int(dp->dp_meta_objset, + ddpa->origin_origin->ds_dir->dd_phys->dd_clones, hds->ds_object, tx)); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, - pa->origin_origin->ds_dir->dd_phys->dd_clones, + VERIFY0(zap_remove_int(dp->dp_meta_objset, + ddpa->origin_origin->ds_dir->dd_phys->dd_clones, origin_head->ds_object, tx)); if (dd->dd_phys->dd_clones == 0) { dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, + VERIFY0(zap_add_int(dp->dp_meta_objset, dd->dd_phys->dd_clones, origin_head->ds_object, tx)); - } /* move snapshots to this dir */ - for (snap = list_head(&pa->shared_snaps); snap; - snap = list_next(&pa->shared_snaps, snap)) { + for (snap = list_head(&ddpa->shared_snaps); snap; + snap = list_next(&ddpa->shared_snaps, snap)) { dsl_dataset_t *ds = snap->ds; - /* unregister props as dsl_dir is changing */ + /* + * Property callbacks are registered to a particular + * dsl_dir. Since ours is changing, evict the objset + * so that they will be unregistered from the old dsl_dir. + */ if (ds->ds_objset) { dmu_objset_evict(ds->ds_objset); ds->ds_objset = NULL; } + /* move snap name entry */ - VERIFY(0 == dsl_dataset_get_snapname(ds)); - VERIFY(0 == dsl_dataset_snap_remove(origin_head, + VERIFY0(dsl_dataset_get_snapname(ds)); + VERIFY0(dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx)); - VERIFY(0 == zap_add(dp->dp_meta_objset, + VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); @@ -2868,8 +2026,8 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); ds->ds_phys->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, odd); - dsl_dir_close(ds->ds_dir, ds); - VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, + dsl_dir_rele(ds->ds_dir, ds); + VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); /* move any clone references */ @@ -2881,1278 +2039,687 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) for (zap_cursor_init(&zc, dp->dp_meta_objset, ds->ds_phys->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; - zap_cursor_advance(&zc)) { - dsl_dataset_t *cnds; - uint64_t o; - - if (za.za_first_integer == oldnext_obj) { - /* - * We've already moved the - * origin's reference. - */ - continue; - } - - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, - za.za_first_integer, FTAG, &cnds)); - o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; - - VERIFY3U(zap_remove_int(dp->dp_meta_objset, - odd->dd_phys->dd_clones, o, tx), ==, 0); - VERIFY3U(zap_add_int(dp->dp_meta_objset, - dd->dd_phys->dd_clones, o, tx), ==, 0); - dsl_dataset_rele(cnds, FTAG); - } - zap_cursor_fini(&zc); - } - - ASSERT0(dsl_prop_numcb(ds)); - } - - /* - * Change space accounting. - * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either - * both be valid, or both be 0 (resulting in delta == 0). This - * is true for each of {clone,origin} independently. - */ - - delta = pa->cloneusedsnap - - dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; - ASSERT3S(delta, >=, 0); - ASSERT3U(pa->used, >=, delta); - dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); - dsl_dir_diduse_space(dd, DD_USED_HEAD, - pa->used - delta, pa->comp, pa->uncomp, tx); - - delta = pa->originusedsnap - - odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; - ASSERT3S(delta, <=, 0); - ASSERT3U(pa->used, >=, -delta); - dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); - dsl_dir_diduse_space(odd, DD_USED_HEAD, - -pa->used - delta, -pa->comp, -pa->uncomp, tx); - - origin_ds->ds_phys->ds_unique_bytes = pa->unique; - - /* log history record */ - spa_history_log_internal_ds(hds, "promote", tx, ""); - - dsl_dir_close(odd, FTAG); -} - -static char *snaplist_tag = "snaplist"; -/* - * Make a list of dsl_dataset_t's for the snapshots between first_obj - * (exclusive) and last_obj (inclusive). The list will be in reverse - * order (last_obj will be the list_head()). If first_obj == 0, do all - * snapshots back to this dataset's origin. - */ -static int -snaplist_make(dsl_pool_t *dp, boolean_t own, - uint64_t first_obj, uint64_t last_obj, list_t *l) -{ - uint64_t obj = last_obj; - - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); - - list_create(l, sizeof (struct promotenode), - offsetof(struct promotenode, link)); - - while (obj != first_obj) { - dsl_dataset_t *ds; - struct promotenode *snap; - int err; - - if (own) { - err = dsl_dataset_own_obj(dp, obj, - 0, snaplist_tag, &ds); - if (err == 0) - dsl_dataset_make_exclusive(ds, snaplist_tag); - } else { - err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); - } - if (err == ENOENT) { - /* lost race with snapshot destroy */ - struct promotenode *last = list_tail(l); - ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); - obj = last->ds->ds_phys->ds_prev_snap_obj; - continue; - } else if (err) { - return (err); - } - - if (first_obj == 0) - first_obj = ds->ds_dir->dd_phys->dd_origin_obj; - - snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); - snap->ds = ds; - list_insert_tail(l, snap); - obj = ds->ds_phys->ds_prev_snap_obj; - } - - return (0); -} - -static int -snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) -{ - struct promotenode *snap; - - *spacep = 0; - for (snap = list_head(l); snap; snap = list_next(l, snap)) { - uint64_t used, comp, uncomp; - dsl_deadlist_space_range(&snap->ds->ds_deadlist, - mintxg, UINT64_MAX, &used, &comp, &uncomp); - *spacep += used; - } - return (0); -} - -static void -snaplist_destroy(list_t *l, boolean_t own) -{ - struct promotenode *snap; - - if (!l || !list_link_active(&l->list_head)) - return; - - while ((snap = list_tail(l)) != NULL) { - list_remove(l, snap); - if (own) - dsl_dataset_disown(snap->ds, snaplist_tag); - else - dsl_dataset_rele(snap->ds, snaplist_tag); - kmem_free(snap, sizeof (struct promotenode)); - } - list_destroy(l); -} - -/* - * Promote a clone. Nomenclature note: - * "clone" or "cds": the original clone which is being promoted - * "origin" or "ods": the snapshot which is originally clone's origin - * "origin head" or "ohds": the dataset which is the head - * (filesystem/volume) for the origin - * "origin origin": the origin of the origin's filesystem (typically - * NULL, indicating that the clone is not a clone of a clone). - */ -int -dsl_dataset_promote(const char *name, char *conflsnap) -{ - dsl_dataset_t *ds; - dsl_dir_t *dd; - dsl_pool_t *dp; - dmu_object_info_t doi; - struct promotearg pa; - struct promotenode *snap; - int err; - - bzero(&pa, sizeof(struct promotearg)); - err = dsl_dataset_hold(name, FTAG, &ds); - if (err) - return (err); - dd = ds->ds_dir; - dp = dd->dd_pool; - - err = dmu_object_info(dp->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, &doi); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } - - if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* - * We are going to inherit all the snapshots taken before our - * origin (i.e., our new origin will be our parent's origin). - * Take ownership of them so that we can rename them into our - * namespace. - */ - rw_enter(&dp->dp_config_rwlock, RW_READER); - - err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, - &pa.shared_snaps); - if (err != 0) - goto out; - - err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); - if (err != 0) - goto out; - - snap = list_head(&pa.shared_snaps); - ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); - err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, - snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); - if (err != 0) - goto out; - - if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { - err = dsl_dataset_hold_obj(dp, - snap->ds->ds_dir->dd_phys->dd_origin_obj, - FTAG, &pa.origin_origin); - if (err != 0) - goto out; - } - -out: - rw_exit(&dp->dp_config_rwlock); - - /* - * Add in 128x the snapnames zapobj size, since we will be moving - * a bunch of snapnames to the promoted ds, and dirtying their - * bonus buffers. - */ - if (err == 0) { - err = dsl_sync_task_do(dp, dsl_dataset_promote_check, - dsl_dataset_promote_sync, ds, &pa, - 2 + 2 * doi.doi_physical_blocks_512); - if (err && pa.err_ds && conflsnap) - (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN); - } - - snaplist_destroy(&pa.shared_snaps, B_TRUE); - snaplist_destroy(&pa.clone_snaps, B_FALSE); - snaplist_destroy(&pa.origin_snaps, B_FALSE); - if (pa.origin_origin) - dsl_dataset_rele(pa.origin_origin, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); -} - -struct cloneswaparg { - dsl_dataset_t *cds; /* clone dataset */ - dsl_dataset_t *ohds; /* origin's head dataset */ - boolean_t force; - int64_t unused_refres_delta; /* change in unconsumed refreservation */ -}; - -/* ARGSUSED */ -static int -dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - struct cloneswaparg *csa = arg1; - - /* they should both be heads */ - if (dsl_dataset_is_snapshot(csa->cds) || - dsl_dataset_is_snapshot(csa->ohds)) - return (EINVAL); - - /* the branch point should be just before them */ - if (csa->cds->ds_prev != csa->ohds->ds_prev) - return (EINVAL); - - /* cds should be the clone (unless they are unrelated) */ - if (csa->cds->ds_prev != NULL && - csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && - csa->ohds->ds_object != - csa->cds->ds_prev->ds_phys->ds_next_snap_obj) - return (EINVAL); - - /* the clone should be a child of the origin */ - if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) - return (EINVAL); - - /* ohds shouldn't be modified unless 'force' */ - if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) - return (ETXTBSY); - - /* adjust amount of any unconsumed refreservation */ - csa->unused_refres_delta = - (int64_t)MIN(csa->ohds->ds_reserved, - csa->ohds->ds_phys->ds_unique_bytes) - - (int64_t)MIN(csa->ohds->ds_reserved, - csa->cds->ds_phys->ds_unique_bytes); - - if (csa->unused_refres_delta > 0 && - csa->unused_refres_delta > - dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); - - if (csa->ohds->ds_quota != 0 && - csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota) - return (EDQUOT); - - return (0); -} - -/* ARGSUSED */ -static void -dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - struct cloneswaparg *csa = arg1; - dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; - - ASSERT(csa->cds->ds_reserved == 0); - ASSERT(csa->ohds->ds_quota == 0 || - csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota); - - dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); - dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); - - if (csa->cds->ds_objset != NULL) { - dmu_objset_evict(csa->cds->ds_objset); - csa->cds->ds_objset = NULL; - } - - if (csa->ohds->ds_objset != NULL) { - dmu_objset_evict(csa->ohds->ds_objset); - csa->ohds->ds_objset = NULL; - } - - /* - * Reset origin's unique bytes, if it exists. - */ - if (csa->cds->ds_prev) { - dsl_dataset_t *origin = csa->cds->ds_prev; - uint64_t comp, uncomp; - - dmu_buf_will_dirty(origin->ds_dbuf, tx); - dsl_deadlist_space_range(&csa->cds->ds_deadlist, - origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); - } - - /* swap blkptrs */ - { - blkptr_t tmp; - tmp = csa->ohds->ds_phys->ds_bp; - csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; - csa->cds->ds_phys->ds_bp = tmp; - } - - /* set dd_*_bytes */ - { - int64_t dused, dcomp, duncomp; - uint64_t cdl_used, cdl_comp, cdl_uncomp; - uint64_t odl_used, odl_comp, odl_uncomp; - - ASSERT3U(csa->cds->ds_dir->dd_phys-> - dd_used_breakdown[DD_USED_SNAP], ==, 0); - - dsl_deadlist_space(&csa->cds->ds_deadlist, - &cdl_used, &cdl_comp, &cdl_uncomp); - dsl_deadlist_space(&csa->ohds->ds_deadlist, - &odl_used, &odl_comp, &odl_uncomp); - - dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used - - (csa->ohds->ds_phys->ds_referenced_bytes + odl_used); - dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - - (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); - duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + - cdl_uncomp - - (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); - - dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, - dused, dcomp, duncomp, tx); - dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, - -dused, -dcomp, -duncomp, tx); - - /* - * The difference in the space used by snapshots is the - * difference in snapshot space due to the head's - * deadlist (since that's the only thing that's - * changing that affects the snapused). - */ - dsl_deadlist_space_range(&csa->cds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, - &cdl_used, &cdl_comp, &cdl_uncomp); - dsl_deadlist_space_range(&csa->ohds->ds_deadlist, - csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, - &odl_used, &odl_comp, &odl_uncomp); - dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, - DD_USED_HEAD, DD_USED_SNAP, tx); - } - - /* swap ds_*_bytes */ - SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes, - csa->cds->ds_phys->ds_referenced_bytes); - SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, - csa->cds->ds_phys->ds_compressed_bytes); - SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, - csa->cds->ds_phys->ds_uncompressed_bytes); - SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, - csa->cds->ds_phys->ds_unique_bytes); - - /* apply any parent delta for change in unconsumed refreservation */ - dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, - csa->unused_refres_delta, 0, 0, tx); - - /* - * Swap deadlists. - */ - dsl_deadlist_close(&csa->cds->ds_deadlist); - dsl_deadlist_close(&csa->ohds->ds_deadlist); - SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, - csa->cds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, - csa->cds->ds_phys->ds_deadlist_obj); - dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, - csa->ohds->ds_phys->ds_deadlist_obj); - - dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx); - - spa_history_log_internal_ds(csa->cds, "clone swap", tx, - "parent=%s", csa->ohds->ds_dir->dd_myname); -} - -/* - * Swap 'clone' with its origin head datasets. Used at the end of "zfs - * recv" into an existing fs to swizzle the file system to the new - * version, and by "zfs rollback". Can also be used to swap two - * independent head datasets if neither has any snapshots. - */ -int -dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, - boolean_t force) -{ - struct cloneswaparg csa; - int error; - - ASSERT(clone->ds_owner); - ASSERT(origin_head->ds_owner); -retry: - /* - * Need exclusive access for the swap. If we're swapping these - * datasets back after an error, we already hold the locks. - */ - if (!RW_WRITE_HELD(&clone->ds_rwlock)) - rw_enter(&clone->ds_rwlock, RW_WRITER); - if (!RW_WRITE_HELD(&origin_head->ds_rwlock) && - !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { - rw_exit(&clone->ds_rwlock); - rw_enter(&origin_head->ds_rwlock, RW_WRITER); - if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { - rw_exit(&origin_head->ds_rwlock); - goto retry; - } - } - csa.cds = clone; - csa.ohds = origin_head; - csa.force = force; - error = dsl_sync_task_do(clone->ds_dir->dd_pool, - dsl_dataset_clone_swap_check, - dsl_dataset_clone_swap_sync, &csa, NULL, 9); - return (error); -} - -/* - * Given a pool name and a dataset object number in that pool, - * return the name of that dataset. - */ -int -dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) -{ - spa_t *spa; - dsl_pool_t *dp; - dsl_dataset_t *ds; - int error; - - if ((error = spa_open(pname, &spa, FTAG)) != 0) - return (error); - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { - dsl_dataset_name(ds, buf); - dsl_dataset_rele(ds, FTAG); - } - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - - return (error); -} + zap_cursor_advance(&zc)) { + dsl_dataset_t *cnds; + uint64_t o; -int -dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, - uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) -{ - int error = 0; + if (za.za_first_integer == oldnext_obj) { + /* + * We've already moved the + * origin's reference. + */ + continue; + } - ASSERT3S(asize, >, 0); + VERIFY0(dsl_dataset_hold_obj(dp, + za.za_first_integer, FTAG, &cnds)); + o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; - /* - * *ref_rsrv is the portion of asize that will come from any - * unconsumed refreservation space. - */ - *ref_rsrv = 0; + VERIFY0(zap_remove_int(dp->dp_meta_objset, + odd->dd_phys->dd_clones, o, tx)); + VERIFY0(zap_add_int(dp->dp_meta_objset, + dd->dd_phys->dd_clones, o, tx)); + dsl_dataset_rele(cnds, FTAG); + } + zap_cursor_fini(&zc); + } - mutex_enter(&ds->ds_lock); - /* - * Make a space adjustment for reserved bytes. - */ - if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { - ASSERT3U(*used, >=, - ds->ds_reserved - ds->ds_phys->ds_unique_bytes); - *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); - *ref_rsrv = - asize - MIN(asize, parent_delta(ds, asize + inflight)); + ASSERT(!dsl_prop_hascb(ds)); } - if (!check_quota || ds->ds_quota == 0) { - mutex_exit(&ds->ds_lock); - return (0); - } /* - * If they are requesting more space, and our current estimate - * is over quota, they get to try again unless the actual - * on-disk is over quota and there are no pending changes (which - * may free up space for us). + * Change space accounting. + * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either + * both be valid, or both be 0 (resulting in delta == 0). This + * is true for each of {clone,origin} independently. */ - if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { - if (inflight > 0 || - ds->ds_phys->ds_referenced_bytes < ds->ds_quota) - error = ERESTART; - else - error = EDQUOT; - DMU_TX_STAT_BUMP(dmu_tx_quota); - } - mutex_exit(&ds->ds_lock); + delta = ddpa->cloneusedsnap - + dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, >=, 0); + ASSERT3U(ddpa->used, >=, delta); + dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(dd, DD_USED_HEAD, + ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); - return (error); + delta = ddpa->originusedsnap - + odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + ASSERT3S(delta, <=, 0); + ASSERT3U(ddpa->used, >=, -delta); + dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); + dsl_dir_diduse_space(odd, DD_USED_HEAD, + -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); + + origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; + + /* log history record */ + spa_history_log_internal_ds(hds, "promote", tx, ""); + + dsl_dir_rele(odd, FTAG); + promote_rele(ddpa, FTAG); } -/* ARGSUSED */ +/* + * Make a list of dsl_dataset_t's for the snapshots between first_obj + * (exclusive) and last_obj (inclusive). The list will be in reverse + * order (last_obj will be the list_head()). If first_obj == 0, do all + * snapshots back to this dataset's origin. + */ static int -dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +snaplist_make(dsl_pool_t *dp, + uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - int err; + uint64_t obj = last_obj; - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) - return (ENOTSUP); + list_create(l, sizeof (struct promotenode), + offsetof(struct promotenode, link)); - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + while (obj != first_obj) { + dsl_dataset_t *ds; + struct promotenode *snap; + int err; - if (psa->psa_effective_value == 0) - return (0); + err = dsl_dataset_hold_obj(dp, obj, tag, &ds); + ASSERT(err != ENOENT); + if (err != 0) + return (err); - if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes || - psa->psa_effective_value < ds->ds_reserved) - return (ENOSPC); + if (first_obj == 0) + first_obj = ds->ds_dir->dd_phys->dd_origin_obj; + + snap = kmem_alloc(sizeof (*snap), KM_SLEEP); + snap->ds = ds; + list_insert_tail(l, snap); + obj = ds->ds_phys->ds_prev_snap_obj; + } return (0); } -extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *); - -void -dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +static int +snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; - - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); - - if (ds->ds_quota != effective_value) { - dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_quota = effective_value; + struct promotenode *snap; - spa_history_log_internal_ds(ds, "set refquota", tx, - "refquota=%lld", (longlong_t)ds->ds_quota); + *spacep = 0; + for (snap = list_head(l); snap; snap = list_next(l, snap)) { + uint64_t used, comp, uncomp; + dsl_deadlist_space_range(&snap->ds->ds_deadlist, + mintxg, UINT64_MAX, &used, &comp, &uncomp); + *spacep += used; } + return (0); } -int -dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota) +static void +snaplist_destroy(list_t *l, void *tag) { - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; - - dsl_prop_setarg_init_uint64(&psa, "refquota", source, "a); - - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - - /* - * If someone removes a file, then tries to set the quota, we - * want to make sure the file freeing takes effect. - */ - txg_wait_open(ds->ds_dir->dd_pool, 0); + struct promotenode *snap; - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, - ds, &psa, 0); + if (l == NULL || !list_link_active(&l->list_head)) + return; - dsl_dataset_rele(ds, FTAG); - return (err); + while ((snap = list_tail(l)) != NULL) { + list_remove(l, snap); + dsl_dataset_rele(snap->ds, tag); + kmem_free(snap, sizeof (*snap)); + } + list_destroy(l); } static int -dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value; - uint64_t unique; - int err; + int error; + dsl_dir_t *dd; + struct promotenode *snap; - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < - SPA_VERSION_REFRESERVATION) - return (ENOTSUP); + error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, + &ddpa->ddpa_clone); + if (error != 0) + return (error); + dd = ddpa->ddpa_clone->ds_dir; - if (dsl_dataset_is_snapshot(ds)) + if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || + !dsl_dir_is_clone(dd)) { + dsl_dataset_rele(ddpa->ddpa_clone, tag); return (EINVAL); + } - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); - - effective_value = psa->psa_effective_value; - - /* - * If we are doing the preliminary check in open context, the - * space estimates may be inaccurate. - */ - if (!dmu_tx_is_syncing(tx)) - return (0); + error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, + &ddpa->shared_snaps, tag); + if (error != 0) + goto out; - mutex_enter(&ds->ds_lock); - if (!DS_UNIQUE_IS_ACCURATE(ds)) - dsl_dataset_recalc_head_uniq(ds); - unique = ds->ds_phys->ds_unique_bytes; - mutex_exit(&ds->ds_lock); + error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, + &ddpa->clone_snaps, tag); + if (error != 0) + goto out; - if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) { - uint64_t delta = MAX(unique, effective_value) - - MAX(unique, ds->ds_reserved); + snap = list_head(&ddpa->shared_snaps); + ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); + error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, + snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, + &ddpa->origin_snaps, tag); + if (error != 0) + goto out; - if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) - return (ENOSPC); - if (ds->ds_quota > 0 && - effective_value > ds->ds_quota) - return (ENOSPC); + if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { + error = dsl_dataset_hold_obj(dp, + snap->ds->ds_dir->dd_phys->dd_origin_obj, + tag, &ddpa->origin_origin); + if (error != 0) + goto out; } - - return (0); +out: + if (error != 0) + promote_rele(ddpa, tag); + return (error); } static void -dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; - uint64_t unique; - int64_t delta; - - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa); + snaplist_destroy(&ddpa->shared_snaps, tag); + snaplist_destroy(&ddpa->clone_snaps, tag); + snaplist_destroy(&ddpa->origin_snaps, tag); + if (ddpa->origin_origin != NULL) + dsl_dataset_rele(ddpa->origin_origin, tag); + dsl_dataset_rele(ddpa->ddpa_clone, tag); +} - dmu_buf_will_dirty(ds->ds_dbuf, tx); +/* + * Promote a clone. + * + * If it fails due to a conflicting snapshot name, "conflsnap" will be filled + * in with the name. (It must be at least MAXNAMELEN bytes long.) + */ +int +dsl_dataset_promote(const char *name, char *conflsnap) +{ + dsl_dataset_promote_arg_t ddpa = { 0 }; + uint64_t numsnaps; + int error; + objset_t *os; - mutex_enter(&ds->ds_dir->dd_lock); - mutex_enter(&ds->ds_lock); - ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); - unique = ds->ds_phys->ds_unique_bytes; - delta = MAX(0, (int64_t)(effective_value - unique)) - - MAX(0, (int64_t)(ds->ds_reserved - unique)); - ds->ds_reserved = effective_value; - mutex_exit(&ds->ds_lock); + /* + * We will modify space proportional to the number of + * snapshots. Compute numsnaps. + */ + error = dmu_objset_hold(name, FTAG, &os); + if (error != 0) + return (error); + error = zap_count(dmu_objset_pool(os)->dp_meta_objset, + dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); + dmu_objset_rele(os, FTAG); + if (error != 0) + return (error); - dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); - mutex_exit(&ds->ds_dir->dd_lock); + ddpa.ddpa_clonename = name; + ddpa.err_ds = conflsnap; - spa_history_log_internal_ds(ds, "set refreservation", tx, - "refreservation=%lld", (longlong_t)effective_value); + return (dsl_sync_task(name, dsl_dataset_promote_check, + dsl_dataset_promote_sync, &ddpa, 2 + numsnaps)); } int -dsl_dataset_set_reservation(const char *dsname, zprop_source_t source, - uint64_t reservation) +dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, boolean_t force) { - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; + int64_t unused_refres_delta; - dsl_prop_setarg_init_uint64(&psa, "refreservation", source, - &reservation); + /* they should both be heads */ + if (dsl_dataset_is_snapshot(clone) || + dsl_dataset_is_snapshot(origin_head)) + return (EINVAL); - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); + /* the branch point should be just before them */ + if (clone->ds_prev != origin_head->ds_prev) + return (EINVAL); - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_set_reservation_check, - dsl_dataset_set_reservation_sync, ds, &psa, 0); + /* clone should be the clone (unless they are unrelated) */ + if (clone->ds_prev != NULL && + clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && + origin_head->ds_object != + clone->ds_prev->ds_phys->ds_next_snap_obj) + return (EINVAL); - dsl_dataset_rele(ds, FTAG); - return (err); -} + /* the clone should be a child of the origin */ + if (clone->ds_dir->dd_parent != origin_head->ds_dir) + return (EINVAL); -typedef struct zfs_hold_cleanup_arg { - dsl_pool_t *dp; - uint64_t dsobj; - char htag[MAXNAMELEN]; -} zfs_hold_cleanup_arg_t; + /* origin_head shouldn't be modified unless 'force' */ + if (!force && dsl_dataset_modified_since_lastsnap(origin_head)) + return (ETXTBSY); -static void -dsl_dataset_user_release_onexit(void *arg) -{ - zfs_hold_cleanup_arg_t *ca = arg; + /* origin_head should have no long holds (e.g. is not mounted) */ + if (dsl_dataset_long_held(origin_head)) + return (EBUSY); + + /* check amount of any unconsumed refreservation */ + unused_refres_delta = + (int64_t)MIN(origin_head->ds_reserved, + origin_head->ds_phys->ds_unique_bytes) - + (int64_t)MIN(origin_head->ds_reserved, + clone->ds_phys->ds_unique_bytes); + + if (unused_refres_delta > 0 && + unused_refres_delta > + dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) + return (ENOSPC); + + /* clone can't be over the head's refquota */ + if (origin_head->ds_quota != 0 && + clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) + return (EDQUOT); - (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag, - B_TRUE); - kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); + return (0); } void -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, - minor_t minor) +dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, + dsl_dataset_t *origin_head, dmu_tx_t *tx) { - zfs_hold_cleanup_arg_t *ca; - - ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP); - ca->dp = ds->ds_dir->dd_pool; - ca->dsobj = ds->ds_object; - (void) strlcpy(ca->htag, htag, sizeof (ca->htag)); - VERIFY3U(0, ==, zfs_onexit_add_cb(minor, - dsl_dataset_user_release_onexit, ca, NULL)); -} + dsl_pool_t *dp = dmu_tx_pool(tx); + int64_t unused_refres_delta; -/* - * If you add new checks here, you may need to add - * additional checks to the "temporary" case in - * snapshot_check() in dmu_objset.c. - */ -static int -dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct dsl_ds_holdarg *ha = arg2; - const char *htag = ha->htag; - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - int error = 0; + ASSERT(clone->ds_reserved == 0); + ASSERT(origin_head->ds_quota == 0 || + clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); + dmu_buf_will_dirty(clone->ds_dbuf, tx); + dmu_buf_will_dirty(origin_head->ds_dbuf, tx); - if (!dsl_dataset_is_snapshot(ds)) - return (EINVAL); + if (clone->ds_objset != NULL) { + dmu_objset_evict(clone->ds_objset); + clone->ds_objset = NULL; + } - /* tags must be unique */ - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj) { - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, - 8, 1, tx); - if (error == 0) - error = EEXIST; - else if (error == ENOENT) - error = 0; + if (origin_head->ds_objset != NULL) { + dmu_objset_evict(origin_head->ds_objset); + origin_head->ds_objset = NULL; } - mutex_exit(&ds->ds_lock); - if (error == 0 && ha->temphold && - strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - error = E2BIG; + unused_refres_delta = + (int64_t)MIN(origin_head->ds_reserved, + origin_head->ds_phys->ds_unique_bytes) - + (int64_t)MIN(origin_head->ds_reserved, + clone->ds_phys->ds_unique_bytes); + + /* + * Reset origin's unique bytes, if it exists. + */ + if (clone->ds_prev) { + dsl_dataset_t *origin = clone->ds_prev; + uint64_t comp, uncomp; + + dmu_buf_will_dirty(origin->ds_dbuf, tx); + dsl_deadlist_space_range(&clone->ds_deadlist, + origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); + } + + /* swap blkptrs */ + { + blkptr_t tmp; + tmp = origin_head->ds_phys->ds_bp; + origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; + clone->ds_phys->ds_bp = tmp; + } + + /* set dd_*_bytes */ + { + int64_t dused, dcomp, duncomp; + uint64_t cdl_used, cdl_comp, cdl_uncomp; + uint64_t odl_used, odl_comp, odl_uncomp; + + ASSERT3U(clone->ds_dir->dd_phys-> + dd_used_breakdown[DD_USED_SNAP], ==, 0); + + dsl_deadlist_space(&clone->ds_deadlist, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space(&origin_head->ds_deadlist, + &odl_used, &odl_comp, &odl_uncomp); - return (error); -} + dused = clone->ds_phys->ds_referenced_bytes + cdl_used - + (origin_head->ds_phys->ds_referenced_bytes + odl_used); + dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - + (origin_head->ds_phys->ds_compressed_bytes + odl_comp); + duncomp = clone->ds_phys->ds_uncompressed_bytes + + cdl_uncomp - + (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); -void -dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dataset_t *ds = arg1; - struct dsl_ds_holdarg *ha = arg2; - const char *htag = ha->htag; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - uint64_t now = gethrestime_sec(); - uint64_t zapobj; + dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, + dused, dcomp, duncomp, tx); + dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, + -dused, -dcomp, -duncomp, tx); - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj == 0) { /* - * This is the first user hold for this dataset. Create - * the userrefs zap object. + * The difference in the space used by snapshots is the + * difference in snapshot space due to the head's + * deadlist (since that's the only thing that's + * changing that affects the snapused). */ - dmu_buf_will_dirty(ds->ds_dbuf, tx); - zapobj = ds->ds_phys->ds_userrefs_obj = - zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); - } else { - zapobj = ds->ds_phys->ds_userrefs_obj; + dsl_deadlist_space_range(&clone->ds_deadlist, + origin_head->ds_dir->dd_origin_txg, UINT64_MAX, + &cdl_used, &cdl_comp, &cdl_uncomp); + dsl_deadlist_space_range(&origin_head->ds_deadlist, + origin_head->ds_dir->dd_origin_txg, UINT64_MAX, + &odl_used, &odl_comp, &odl_uncomp); + dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, + DD_USED_HEAD, DD_USED_SNAP, tx); } - ds->ds_userrefs++; - mutex_exit(&ds->ds_lock); - VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); + /* swap ds_*_bytes */ + SWITCH64(origin_head->ds_phys->ds_referenced_bytes, + clone->ds_phys->ds_referenced_bytes); + SWITCH64(origin_head->ds_phys->ds_compressed_bytes, + clone->ds_phys->ds_compressed_bytes); + SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, + clone->ds_phys->ds_uncompressed_bytes); + SWITCH64(origin_head->ds_phys->ds_unique_bytes, + clone->ds_phys->ds_unique_bytes); - if (ha->temphold) { - VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object, - htag, &now, tx)); - } + /* apply any parent delta for change in unconsumed refreservation */ + dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, + unused_refres_delta, 0, 0, tx); - spa_history_log_internal_ds(ds, "hold", tx, - "tag = %s temp = %d holds now = %llu", - htag, (int)ha->temphold, ds->ds_userrefs); -} + /* + * Swap deadlists. + */ + dsl_deadlist_close(&clone->ds_deadlist); + dsl_deadlist_close(&origin_head->ds_deadlist); + SWITCH64(origin_head->ds_phys->ds_deadlist_obj, + clone->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, + clone->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, + origin_head->ds_phys->ds_deadlist_obj); -static int -dsl_dataset_user_hold_one(const char *dsname, void *arg) -{ - struct dsl_ds_holdarg *ha = arg; - dsl_dataset_t *ds; - int error; - char *name; + dsl_scan_ds_clone_swapped(origin_head, clone, tx); - /* alloc a buffer to hold dsname@snapname plus terminating NULL */ - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = dsl_dataset_hold(name, ha->dstg, &ds); - strfree(name); - if (error == 0) { - ha->gotone = B_TRUE; - dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, - dsl_dataset_user_hold_sync, ds, ha, 0); - } else if (error == ENOENT && ha->recursive) { - error = 0; - } else { - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - } - return (error); + spa_history_log_internal_ds(clone, "clone swap", tx, + "parent=%s", origin_head->ds_dir->dd_myname); } +/* + * Given a pool name and a dataset object number in that pool, + * return the name of that dataset. + */ int -dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag, - boolean_t temphold) +dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) { - struct dsl_ds_holdarg *ha; + dsl_pool_t *dp; + dsl_dataset_t *ds; int error; - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); - ha->htag = htag; - ha->temphold = temphold; - error = dsl_sync_task_do(ds->ds_dir->dd_pool, - dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync, - ds, ha, 0); - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + error = dsl_pool_hold(pname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); + if (error == 0) { + dsl_dataset_name(ds, buf); + dsl_dataset_rele(ds, FTAG); + } + dsl_pool_rele(dp, FTAG); return (error); } int -dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, - boolean_t recursive, boolean_t temphold, int cleanup_fd) +dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, + uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) { - struct dsl_ds_holdarg *ha; - dsl_sync_task_t *dst; - spa_t *spa; - int error; - minor_t minor = 0; - - if (cleanup_fd != -1) { - /* Currently we only support cleanup-on-exit of tempholds. */ - if (!temphold) - return (EINVAL); - error = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (error) - return (error); - } + int error = 0; - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); + ASSERT3S(asize, >, 0); - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); + /* + * *ref_rsrv is the portion of asize that will come from any + * unconsumed refreservation space. + */ + *ref_rsrv = 0; - error = spa_open(dsname, &spa, FTAG); - if (error) { - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - if (cleanup_fd != -1) - zfs_onexit_fd_rele(cleanup_fd); - return (error); + mutex_enter(&ds->ds_lock); + /* + * Make a space adjustment for reserved bytes. + */ + if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { + ASSERT3U(*used, >=, + ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + *ref_rsrv = + asize - MIN(asize, parent_delta(ds, asize + inflight)); } - ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - ha->htag = htag; - ha->snapname = snapname; - ha->recursive = recursive; - ha->temphold = temphold; - - if (recursive) { - error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, - ha, DS_FIND_CHILDREN); - } else { - error = dsl_dataset_user_hold_one(dsname, ha); + if (!check_quota || ds->ds_quota == 0) { + mutex_exit(&ds->ds_lock); + return (0); } - if (error == 0) - error = dsl_sync_task_group_wait(ha->dstg); - - for (dst = list_head(&ha->dstg->dstg_tasks); dst; - dst = list_next(&ha->dstg->dstg_tasks, dst)) { - dsl_dataset_t *ds = dst->dst_arg1; - - if (dst->dst_err) { - dsl_dataset_name(ds, ha->failed); - *strchr(ha->failed, '@') = '\0'; - } else if (error == 0 && minor != 0 && temphold) { - /* - * If this hold is to be released upon process exit, - * register that action now. - */ - dsl_register_onexit_hold_cleanup(ds, htag, minor); - } - dsl_dataset_rele(ds, ha->dstg); + /* + * If they are requesting more space, and our current estimate + * is over quota, they get to try again unless the actual + * on-disk is over quota and there are no pending changes (which + * may free up space for us). + */ + if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { + if (inflight > 0 || + ds->ds_phys->ds_referenced_bytes < ds->ds_quota) + error = ERESTART; + else + error = EDQUOT; } + mutex_exit(&ds->ds_lock); - if (error == 0 && recursive && !ha->gotone) - error = ENOENT; - - if (error) - (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); - - dsl_sync_task_group_destroy(ha->dstg); - - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - spa_close(spa, FTAG); - if (cleanup_fd != -1) - zfs_onexit_fd_rele(cleanup_fd); return (error); } -struct dsl_ds_releasearg { - dsl_dataset_t *ds; - const char *htag; - boolean_t own; /* do we own or just hold ds? */ -}; +typedef struct dsl_dataset_set_qr_arg { + const char *ddsqra_name; + zprop_source_t ddsqra_source; + uint64_t ddsqra_value; +} dsl_dataset_set_qr_arg_t; + +/* ARGSUSED */ static int -dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, - boolean_t *might_destroy) +dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) { - objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t zapobj; - uint64_t tmp; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; int error; + uint64_t newval; - *might_destroy = B_FALSE; + if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) + return (ENOTSUP); - mutex_enter(&ds->ds_lock); - zapobj = ds->ds_phys->ds_userrefs_obj; - if (zapobj == 0) { - /* The tag can't possibly exist */ - mutex_exit(&ds->ds_lock); - return (ESRCH); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); } - /* Make sure the tag exists */ - error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); - if (error) { - mutex_exit(&ds->ds_lock); - if (error == ENOENT) - error = ESRCH; + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); return (error); } - if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)) - *might_destroy = B_TRUE; + if (newval == 0) { + dsl_dataset_rele(ds, FTAG); + return (0); + } + + if (newval < ds->ds_phys->ds_referenced_bytes || + newval < ds->ds_reserved) { + dsl_dataset_rele(ds, FTAG); + return (ENOSPC); + } - mutex_exit(&ds->ds_lock); + dsl_dataset_rele(ds, FTAG); return (0); } -static int -dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) +static void +dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) { - struct dsl_ds_releasearg *ra = arg1; - dsl_dataset_t *ds = ra->ds; - boolean_t might_destroy; - int error; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + uint64_t newval; - if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) - return (ENOTSUP); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); - error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); - if (error) - return (error); + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - if (might_destroy) { - struct dsl_ds_destroyarg dsda = {0}; + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); - if (dmu_tx_is_syncing(tx)) { - /* - * If we're not prepared to remove the snapshot, - * we can't allow the release to happen right now. - */ - if (!ra->own) - return (EBUSY); - } - dsda.ds = ds; - dsda.releasing = B_TRUE; - return (dsl_dataset_destroy_check(&dsda, tag, tx)); + if (ds->ds_quota != newval) { + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_quota = newval; } - - return (0); + dsl_dataset_rele(ds, FTAG); } -static void -dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx) +int +dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, + uint64_t refquota) { - struct dsl_ds_releasearg *ra = arg1; - dsl_dataset_t *ds = ra->ds; - dsl_pool_t *dp = ds->ds_dir->dd_pool; - objset_t *mos = dp->dp_meta_objset; - uint64_t zapobj; - uint64_t refs; - int error; + dsl_dataset_set_qr_arg_t ddsqra; - mutex_enter(&ds->ds_lock); - ds->ds_userrefs--; - refs = ds->ds_userrefs; - mutex_exit(&ds->ds_lock); - error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx); - VERIFY(error == 0 || error == ENOENT); - zapobj = ds->ds_phys->ds_userrefs_obj; - VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); - if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && - DS_IS_DEFER_DESTROY(ds)) { - struct dsl_ds_destroyarg dsda = {0}; - - ASSERT(ra->own); - dsda.ds = ds; - dsda.releasing = B_TRUE; - /* We already did the destroy_check */ - dsl_dataset_destroy_sync(&dsda, tag, tx); - } + ddsqra.ddsqra_name = dsname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = refquota; + + return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, + dsl_dataset_set_refquota_sync, &ddsqra, 0)); } static int -dsl_dataset_user_release_one(const char *dsname, void *arg) +dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) { - struct dsl_ds_holdarg *ha = arg; - struct dsl_ds_releasearg *ra; + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int error; - void *dtag = ha->dstg; - char *name; - boolean_t own = B_FALSE; - boolean_t might_destroy; - - /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = dsl_dataset_hold(name, dtag, &ds); - strfree(name); - if (error == ENOENT && ha->recursive) - return (0); - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - if (error) - return (error); - - ha->gotone = B_TRUE; + uint64_t newval, unique; - ASSERT(dsl_dataset_is_snapshot(ds)); + if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) + return (ENOTSUP); - error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); - if (error) { - dsl_dataset_rele(ds, dtag); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) return (error); - } - if (might_destroy) { -#ifdef _KERNEL - name = kmem_asprintf("%s@%s", dsname, ha->snapname); - error = zfs_unmount_snap(name, NULL); - strfree(name); - if (error) { - dsl_dataset_rele(ds, dtag); - return (error); - } -#endif - if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { - dsl_dataset_rele(ds, dtag); - return (EBUSY); - } else { - own = B_TRUE; - dsl_dataset_make_exclusive(ds, dtag); - } + if (dsl_dataset_is_snapshot(ds)) { + dsl_dataset_rele(ds, FTAG); + return (EINVAL); } - ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); - ra->ds = ds; - ra->htag = ha->htag; - ra->own = own; - dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, - dsl_dataset_user_release_sync, ra, dtag, 0); - - return (0); -} - -int -dsl_dataset_user_release(char *dsname, char *snapname, char *htag, - boolean_t recursive) -{ - struct dsl_ds_holdarg *ha; - dsl_sync_task_t *dst; - spa_t *spa; - int error; - -top: - ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); - - (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); - - error = spa_open(dsname, &spa, FTAG); - if (error) { - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); return (error); } - ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); - ha->htag = htag; - ha->snapname = snapname; - ha->recursive = recursive; - if (recursive) { - error = dmu_objset_find(dsname, dsl_dataset_user_release_one, - ha, DS_FIND_CHILDREN); - } else { - error = dsl_dataset_user_release_one(dsname, ha); + /* + * If we are doing the preliminary check in open context, the + * space estimates may be inaccurate. + */ + if (!dmu_tx_is_syncing(tx)) { + dsl_dataset_rele(ds, FTAG); + return (0); } - if (error == 0) - error = dsl_sync_task_group_wait(ha->dstg); - for (dst = list_head(&ha->dstg->dstg_tasks); dst; - dst = list_next(&ha->dstg->dstg_tasks, dst)) { - struct dsl_ds_releasearg *ra = dst->dst_arg1; - dsl_dataset_t *ds = ra->ds; - - if (dst->dst_err) - dsl_dataset_name(ds, ha->failed); + mutex_enter(&ds->ds_lock); + if (!DS_UNIQUE_IS_ACCURATE(ds)) + dsl_dataset_recalc_head_uniq(ds); + unique = ds->ds_phys->ds_unique_bytes; + mutex_exit(&ds->ds_lock); - if (ra->own) - dsl_dataset_disown(ds, ha->dstg); - else - dsl_dataset_rele(ds, ha->dstg); + if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { + uint64_t delta = MAX(unique, newval) - + MAX(unique, ds->ds_reserved); - kmem_free(ra, sizeof (struct dsl_ds_releasearg)); + if (delta > + dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || + (ds->ds_quota > 0 && newval > ds->ds_quota)) { + dsl_dataset_rele(ds, FTAG); + return (ENOSPC); + } } - if (error == 0 && recursive && !ha->gotone) - error = ENOENT; - - if (error && error != EBUSY) - (void) strlcpy(dsname, ha->failed, sizeof (ha->failed)); - - dsl_sync_task_group_destroy(ha->dstg); - kmem_free(ha, sizeof (struct dsl_ds_holdarg)); - spa_close(spa, FTAG); - - /* - * We can get EBUSY if we were racing with deferred destroy and - * dsl_dataset_user_release_check() hadn't done the necessary - * open context setup. We can also get EBUSY if we're racing - * with destroy and that thread is the ds_owner. Either way - * the busy condition should be transient, and we should retry - * the release operation. - */ - if (error == EBUSY) - goto top; - - return (error); + dsl_dataset_rele(ds, FTAG); + return (0); } -/* - * Called at spa_load time (with retry == B_FALSE) to release a stale - * temporary user hold. Also called by the onexit code (with retry == B_TRUE). - */ -int -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag, - boolean_t retry) +void +dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, + zprop_source_t source, uint64_t value, dmu_tx_t *tx) { - dsl_dataset_t *ds; - char *snap; - char *name; - int namelen; - int error; + uint64_t newval; + uint64_t unique; + int64_t delta; - do { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) - return (error); - namelen = dsl_dataset_namelen(ds)+1; - name = kmem_alloc(namelen, KM_SLEEP); - dsl_dataset_name(ds, name); - dsl_dataset_rele(ds, FTAG); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + source, sizeof (value), 1, &value, tx); - snap = strchr(name, '@'); - *snap = '\0'; - ++snap; - error = dsl_dataset_user_release(name, snap, htag, B_FALSE); - kmem_free(name, namelen); + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); - /* - * The object can't have been destroyed because we have a hold, - * but it might have been renamed, resulting in ENOENT. Retry - * if we've been requested to do so. - * - * It would be nice if we could use the dsobj all the way - * through and avoid ENOENT entirely. But we might need to - * unmount the snapshot, and there's currently no way to lookup - * a vfsp using a ZFS object id. - */ - } while ((error == ENOENT) && retry); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + mutex_enter(&ds->ds_dir->dd_lock); + mutex_enter(&ds->ds_lock); + ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); + unique = ds->ds_phys->ds_unique_bytes; + delta = MAX(0, (int64_t)(newval - unique)) - + MAX(0, (int64_t)(ds->ds_reserved - unique)); + ds->ds_reserved = newval; + mutex_exit(&ds->ds_lock); - return (error); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); + mutex_exit(&ds->ds_dir->dd_lock); } -int -dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) +static void +dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) { + dsl_dataset_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; - int err; - - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); - if (ds->ds_phys->ds_userrefs_obj != 0) { - zap_attribute_t *za; - zap_cursor_t zc; - - za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); - for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_userrefs_obj); - zap_cursor_retrieve(&zc, za) == 0; - zap_cursor_advance(&zc)) { - VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, - za->za_first_integer)); - } - zap_cursor_fini(&zc); - kmem_free(za, sizeof (zap_attribute_t)); - } + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); + dsl_dataset_set_refreservation_sync_impl(ds, + ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); dsl_dataset_rele(ds, FTAG); - return (0); } -/* - * Note, this function is used as the callback for dmu_objset_find(). We - * always return 0 so that we will continue to find and process - * inconsistent datasets, even if we encounter an error trying to - * process one of them. - */ -/* ARGSUSED */ int -dsl_destroy_inconsistent(const char *dsname, void *arg) +dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, + uint64_t refreservation) { - dsl_dataset_t *ds; + dsl_dataset_set_qr_arg_t ddsqra; - if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { - if (DS_IS_INCONSISTENT(ds)) - (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); - else - dsl_dataset_disown(ds, FTAG); - } - return (0); -} + ddsqra.ddsqra_name = dsname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = refreservation; + return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, + dsl_dataset_set_refreservation_sync, &ddsqra, 0)); +} /* * Return (in *usedp) the amount of space written in new that is not @@ -4179,6 +2746,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, uint64_t snapobj; dsl_pool_t *dp = new->ds_dir->dd_pool; + ASSERT(dsl_pool_config_held(dp)); + *usedp = 0; *usedp += new->ds_phys->ds_referenced_bytes; *usedp -= oldsnap->ds_phys->ds_referenced_bytes; @@ -4191,7 +2760,6 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, *uncompp += new->ds_phys->ds_uncompressed_bytes; *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; - rw_enter(&dp->dp_config_rwlock, RW_READER); snapobj = new->ds_object; while (snapobj != oldsnap->ds_object) { dsl_dataset_t *snap; @@ -4240,7 +2808,6 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, } } - rw_exit(&dp->dp_config_rwlock); return (err); } @@ -4282,7 +2849,6 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, *usedp = *compp = *uncompp = 0; - rw_enter(&dp->dp_config_rwlock, RW_READER); snapobj = lastsnap->ds_phys->ds_next_snap_obj; while (snapobj != firstsnap->ds_object) { dsl_dataset_t *ds; @@ -4303,12 +2869,47 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, ASSERT3U(snapobj, !=, 0); dsl_dataset_rele(ds, FTAG); } - rw_exit(&dp->dp_config_rwlock); return (err); } +/* + * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. + * For example, they could both be snapshots of the same filesystem, and + * 'earlier' is before 'later'. Or 'earlier' could be the origin of + * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's + * filesystem. Or 'earlier' could be the origin's origin. + */ +boolean_t +dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) +{ + dsl_pool_t *dp = later->ds_dir->dd_pool; + int error; + boolean_t ret; + dsl_dataset_t *origin; + + ASSERT(dsl_pool_config_held(dp)); + + if (earlier->ds_phys->ds_creation_txg >= + later->ds_phys->ds_creation_txg) + return (B_FALSE); + + if (later->ds_dir == earlier->ds_dir) + return (B_TRUE); + if (!dsl_dir_is_clone(later->ds_dir)) + return (B_FALSE); + + if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) + return (B_TRUE); + error = dsl_dataset_hold_obj(dp, + later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + if (error != 0) + return (B_FALSE); + ret = dsl_dataset_is_before(origin, earlier); + dsl_dataset_rele(origin, FTAG); + return (ret); +} + #if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(dmu_snapshots_destroy_nvl); EXPORT_SYMBOL(dsl_dataset_hold); EXPORT_SYMBOL(dsl_dataset_hold_obj); EXPORT_SYMBOL(dsl_dataset_own); @@ -4316,22 +2917,14 @@ EXPORT_SYMBOL(dsl_dataset_own_obj); EXPORT_SYMBOL(dsl_dataset_name); EXPORT_SYMBOL(dsl_dataset_rele); EXPORT_SYMBOL(dsl_dataset_disown); -EXPORT_SYMBOL(dsl_dataset_drop_ref); EXPORT_SYMBOL(dsl_dataset_tryown); -EXPORT_SYMBOL(dsl_dataset_make_exclusive); EXPORT_SYMBOL(dsl_dataset_create_sync); EXPORT_SYMBOL(dsl_dataset_create_sync_dd); -EXPORT_SYMBOL(dsl_dataset_destroy); -EXPORT_SYMBOL(dsl_dataset_destroy_check); -EXPORT_SYMBOL(dsl_dataset_destroy_sync); EXPORT_SYMBOL(dsl_dataset_snapshot_check); EXPORT_SYMBOL(dsl_dataset_snapshot_sync); -EXPORT_SYMBOL(dsl_dataset_rename); EXPORT_SYMBOL(dsl_dataset_promote); -EXPORT_SYMBOL(dsl_dataset_clone_swap); EXPORT_SYMBOL(dsl_dataset_user_hold); EXPORT_SYMBOL(dsl_dataset_user_release); -EXPORT_SYMBOL(dsl_dataset_user_release_tmp); EXPORT_SYMBOL(dsl_dataset_get_holds); EXPORT_SYMBOL(dsl_dataset_get_blkptr); EXPORT_SYMBOL(dsl_dataset_set_blkptr); @@ -4351,8 +2944,6 @@ EXPORT_SYMBOL(dsl_dataset_space); EXPORT_SYMBOL(dsl_dataset_fsid_guid); EXPORT_SYMBOL(dsl_dsobj_to_dsname); EXPORT_SYMBOL(dsl_dataset_check_quota); -EXPORT_SYMBOL(dsl_dataset_set_quota); -EXPORT_SYMBOL(dsl_dataset_set_quota_sync); -EXPORT_SYMBOL(dsl_dataset_set_reservation); -EXPORT_SYMBOL(dsl_destroy_inconsistent); +EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl); +EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl); #endif diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 48c261e63806..d09e79f1c70e 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -147,28 +147,37 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr) return (0); } +typedef struct dsl_deleg_arg { + const char *dda_name; + nvlist_t *dda_nvlist; +} dsl_deleg_arg_t; + static void -dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_deleg_set_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - nvlist_t *nvp = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_deleg_arg_t *dda = arg; + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; nvpair_t *whopair = NULL; - uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t zapobj; + + VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); + zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } - while ((whopair = nvlist_next_nvpair(nvp, whopair))) { + while ((whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair))) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; uint64_t jumpobj; - VERIFY(nvpair_value_nvlist(whopair, &perms) == 0); + perms = fnvpair_value_nvlist(whopair); if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) { jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS, @@ -185,21 +194,27 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) "%s %s", whokey, perm); } } + dsl_dir_rele(dd, FTAG); } static void -dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - nvlist_t *nvp = arg2; - objset_t *mos = dd->dd_pool->dp_meta_objset; + dsl_deleg_arg_t *dda = arg; + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; nvpair_t *whopair = NULL; - uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t zapobj; - if (zapobj == 0) + VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); + zapobj = dd->dd_phys->dd_deleg_zapobj; + if (zapobj == 0) { + dsl_dir_rele(dd, FTAG); return; + } - while ((whopair = nvlist_next_nvpair(nvp, whopair))) { + while ((whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair))) { const char *whokey = nvpair_name(whopair); nvlist_t *perms; nvpair_t *permpair = NULL; @@ -234,35 +249,40 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx) "%s %s", whokey, perm); } } + dsl_dir_rele(dd, FTAG); } -int -dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) +static int +dsl_deleg_check(void *arg, dmu_tx_t *tx) { + dsl_deleg_arg_t *dda = arg; dsl_dir_t *dd; int error; - nvpair_t *whopair = NULL; - int blocks_modified = 0; - error = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (error) - return (error); - - if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) < + if (spa_version(dmu_tx_pool(tx)->dp_spa) < SPA_VERSION_DELEGATED_PERMS) { - dsl_dir_close(dd, FTAG); return (ENOTSUP); } - while ((whopair = nvlist_next_nvpair(nvp, whopair))) - blocks_modified++; + error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL); + if (error == 0) + dsl_dir_rele(dd, FTAG); + return (error); +} - error = dsl_sync_task_do(dd->dd_pool, NULL, - unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, - dd, nvp, blocks_modified); - dsl_dir_close(dd, FTAG); +int +dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) +{ + dsl_deleg_arg_t dda; - return (error); + /* nvp must already have been verified to be valid */ + + dda.dda_name = ddname; + dda.dda_nvlist = nvp; + + return (dsl_sync_task(ddname, dsl_deleg_check, + unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, + &dda, fnvlist_num_pairs(nvp))); } /* @@ -293,9 +313,15 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) zap_attribute_t *baseza, *za; char *source; - error = dsl_dir_open(ddname, FTAG, &startdd, NULL); - if (error) + error = dsl_pool_hold(ddname, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL); + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); + } dp = startdd->dd_pool; mos = dp->dp_meta_objset; @@ -307,20 +333,16 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP); VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); - rw_enter(&dp->dp_config_rwlock, RW_READER); for (dd = startdd; dd != NULL; dd = dd->dd_parent) { nvlist_t *sp_nvp; uint64_t n; - if (dd->dd_phys->dd_deleg_zapobj && - (zap_count(mos, dd->dd_phys->dd_deleg_zapobj, - &n) == 0) && n) { - VERIFY(nvlist_alloc(&sp_nvp, - NV_UNIQUE_NAME, KM_SLEEP) == 0); - } else { + if (dd->dd_phys->dd_deleg_zapobj == 0 || + zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 || + n == 0) continue; - } + sp_nvp = fnvlist_alloc(); for (zap_cursor_init(basezc, mos, dd->dd_phys->dd_deleg_zapobj); zap_cursor_retrieve(basezc, baseza) == 0; @@ -330,27 +352,23 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) ASSERT(baseza->za_integer_length == 8); ASSERT(baseza->za_num_integers == 1); - VERIFY(nvlist_alloc(&perms_nvp, - NV_UNIQUE_NAME, KM_SLEEP) == 0); + perms_nvp = fnvlist_alloc(); for (zap_cursor_init(zc, mos, baseza->za_first_integer); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { - VERIFY(nvlist_add_boolean(perms_nvp, - za->za_name) == 0); + fnvlist_add_boolean(perms_nvp, za->za_name); } zap_cursor_fini(zc); - VERIFY(nvlist_add_nvlist(sp_nvp, baseza->za_name, - perms_nvp) == 0); - nvlist_free(perms_nvp); + fnvlist_add_nvlist(sp_nvp, baseza->za_name, perms_nvp); + fnvlist_free(perms_nvp); } zap_cursor_fini(basezc); dsl_dir_name(dd, source); - VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0); + fnvlist_add_nvlist(*nvp, source, sp_nvp); nvlist_free(sp_nvp); } - rw_exit(&dp->dp_config_rwlock); kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); kmem_free(baseza, sizeof(zap_attribute_t)); @@ -358,7 +376,8 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) kmem_free(za, sizeof(zap_attribute_t)); kmem_free(zc, sizeof(zap_cursor_t)); - dsl_dir_close(startdd, FTAG); + dsl_dir_rele(startdd, FTAG); + dsl_pool_rele(dp, FTAG); return (0); } @@ -564,7 +583,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) avl_create(&permsets, perm_set_compare, sizeof (perm_set_t), offsetof(perm_set_t, p_node)); - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent, checkflag = ZFS_DELEG_DESCENDENT) { uint64_t zapobj; @@ -625,7 +644,6 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) } error = EPERM; success: - rw_exit(&dp->dp_config_rwlock); cookie = NULL; while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) @@ -637,15 +655,19 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr) int dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) { + dsl_pool_t *dp; dsl_dataset_t *ds; int error; - error = dsl_dataset_hold(dsname, FTAG, &ds); - if (error) + error = dsl_pool_hold(dsname, FTAG, &dp); + if (error != 0) return (error); - - error = dsl_deleg_access_impl(ds, perm, cr); - dsl_dataset_rele(ds, FTAG); + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error == 0) { + error = dsl_deleg_access_impl(ds, perm, cr); + dsl_dataset_rele(ds, FTAG); + } + dsl_pool_rele(dp, FTAG); return (error); } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c new file mode 100644 index 000000000000..1fb3859ac242 --- /dev/null +++ b/module/zfs/dsl_destroy.c @@ -0,0 +1,940 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct dmu_snapshots_destroy_arg { + nvlist_t *dsda_snaps; + nvlist_t *dsda_successful_snaps; + boolean_t dsda_defer; + nvlist_t *dsda_errlist; +} dmu_snapshots_destroy_arg_t; + +/* + * ds must be owned. + */ +static int +dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) +{ + if (!dsl_dataset_is_snapshot(ds)) + return (EINVAL); + + if (dsl_dataset_long_held(ds)) + return (EBUSY); + + /* + * Only allow deferred destroy on pools that support it. + * NOTE: deferred destroy is only supported on snapshots. + */ + if (defer) { + if (spa_version(ds->ds_dir->dd_pool->dp_spa) < + SPA_VERSION_USERREFS) + return (ENOTSUP); + return (0); + } + + /* + * If this snapshot has an elevated user reference count, + * we can't destroy it yet. + */ + if (ds->ds_userrefs > 0) + return (EBUSY); + + /* + * Can't delete a branch point. + */ + if (ds->ds_phys->ds_num_children > 1) + return (EEXIST); + + return (0); +} + +static int +dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) +{ + dmu_snapshots_destroy_arg_t *dsda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int error = 0; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL); + pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) { + dsl_dataset_t *ds; + + error = dsl_dataset_hold(dp, nvpair_name(pair), + FTAG, &ds); + + /* + * If the snapshot does not exist, silently ignore it + * (it's "already destroyed"). + */ + if (error == ENOENT) + continue; + + if (error == 0) { + error = dsl_destroy_snapshot_check_impl(ds, + dsda->dsda_defer); + dsl_dataset_rele(ds, FTAG); + } + + if (error == 0) { + fnvlist_add_boolean(dsda->dsda_successful_snaps, + nvpair_name(pair)); + } else { + fnvlist_add_int32(dsda->dsda_errlist, + nvpair_name(pair), error); + } + } + + pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); + if (pair != NULL) + return (fnvpair_value_int32(pair)); + return (0); +} + +struct process_old_arg { + dsl_dataset_t *ds; + dsl_dataset_t *ds_prev; + boolean_t after_branch_point; + zio_t *pio; + uint64_t used, comp, uncomp; +}; + +static int +process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + struct process_old_arg *poa = arg; + dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; + + if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { + dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); + if (poa->ds_prev && !poa->after_branch_point && + bp->blk_birth > + poa->ds_prev->ds_phys->ds_prev_snap_txg) { + poa->ds_prev->ds_phys->ds_unique_bytes += + bp_get_dsize_sync(dp->dp_spa, bp); + } + } else { + poa->used += bp_get_dsize_sync(dp->dp_spa, bp); + poa->comp += BP_GET_PSIZE(bp); + poa->uncomp += BP_GET_UCSIZE(bp); + dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); + } + return (0); +} + +static void +process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, + dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) +{ + struct process_old_arg poa = { 0 }; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t deadlist_obj; + + ASSERT(ds->ds_deadlist.dl_oldfmt); + ASSERT(ds_next->ds_deadlist.dl_oldfmt); + + poa.ds = ds; + poa.ds_prev = ds_prev; + poa.after_branch_point = after_branch_point; + poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, + process_old_cb, &poa, tx)); + VERIFY0(zio_wait(poa.pio)); + ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); + + /* change snapused */ + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -poa.used, -poa.comp, -poa.uncomp, tx); + + /* swap next's deadlist to our deadlist */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_close(&ds_next->ds_deadlist); + deadlist_obj = ds->ds_phys->ds_deadlist_obj; + ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj; + ds_next->ds_phys->ds_deadlist_obj = deadlist_obj; + dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds_next->ds_deadlist, mos, + ds_next->ds_phys->ds_deadlist_obj); +} + +static void +dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) +{ + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + zap_cursor_t zc; + zap_attribute_t za; + + /* + * If it is the old version, dd_clones doesn't exist so we can't + * find the clones, but dsl_deadlist_remove_key() is a no-op so it + * doesn't matter. + */ + if (ds->ds_dir->dd_phys->dd_clones == 0) + return; + + for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + dsl_dataset_t *clone; + + VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, + za.za_first_integer, FTAG, &clone)); + if (clone->ds_dir->dd_origin_txg > mintxg) { + dsl_deadlist_remove_key(&clone->ds_deadlist, + mintxg, tx); + dsl_dataset_remove_clones_key(clone, mintxg, tx); + } + dsl_dataset_rele(clone, FTAG); + } + zap_cursor_fini(&zc); +} + +void +dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) +{ +#ifdef ZFS_DEBUG + int err; +#endif + int after_branch_point = FALSE; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds_prev = NULL; + uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0; + dsl_dataset_t *ds_next, *ds_head, *hds; + + + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + ASSERT(refcount_is_zero(&ds->ds_longholds)); + + if (defer && + (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) { + ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); + return; + } + + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + + dsl_scan_ds_destroyed(ds, tx); + + obj = ds->ds_object; + + if (ds->ds_phys->ds_prev_snap_obj != 0) { + ASSERT3P(ds->ds_prev, ==, NULL); + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); + after_branch_point = + (ds_prev->ds_phys->ds_next_snap_obj != obj); + + dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); + if (after_branch_point && + ds_prev->ds_phys->ds_next_clones_obj != 0) { + dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); + if (ds->ds_phys->ds_next_snap_obj != 0) { + VERIFY0(zap_add_int(mos, + ds_prev->ds_phys->ds_next_clones_obj, + ds->ds_phys->ds_next_snap_obj, tx)); + } + } + if (!after_branch_point) { + ds_prev->ds_phys->ds_next_snap_obj = + ds->ds_phys->ds_next_snap_obj; + } + } + + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); + ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); + + old_unique = ds_next->ds_phys->ds_unique_bytes; + + dmu_buf_will_dirty(ds_next->ds_dbuf, tx); + ds_next->ds_phys->ds_prev_snap_obj = + ds->ds_phys->ds_prev_snap_obj; + ds_next->ds_phys->ds_prev_snap_txg = + ds->ds_phys->ds_prev_snap_txg; + ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, + ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); + + if (ds_next->ds_deadlist.dl_oldfmt) { + process_old_deadlist(ds, ds_prev, ds_next, + after_branch_point, tx); + } else { + /* Adjust prev's unique space. */ + if (ds_prev && !after_branch_point) { + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds_prev->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_prev_snap_txg, + &used, &comp, &uncomp); + ds_prev->ds_phys->ds_unique_bytes += used; + } + + /* Adjust snapused. */ + dsl_deadlist_space_range(&ds_next->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + &used, &comp, &uncomp); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, + -used, -comp, -uncomp, tx); + + /* Move blocks to be freed to pool's free list. */ + dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, + &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, + tx); + dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, + DD_USED_HEAD, used, comp, uncomp, tx); + + /* Merge our deadlist into next's and free it. */ + dsl_deadlist_merge(&ds_next->ds_deadlist, + ds->ds_phys->ds_deadlist_obj, tx); + } + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_deadlist_obj = 0; + + /* Collapse range in clone heads */ + dsl_dataset_remove_clones_key(ds, + ds->ds_phys->ds_creation_txg, tx); + + if (dsl_dataset_is_snapshot(ds_next)) { + dsl_dataset_t *ds_nextnext; + + /* + * Update next's unique to include blocks which + * were previously shared by only this snapshot + * and it. Those blocks will be born after the + * prev snap and before this snap, and will have + * died after the next snap and before the one + * after that (ie. be on the snap after next's + * deadlist). + */ + VERIFY0(dsl_dataset_hold_obj(dp, + ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext)); + dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, + ds->ds_phys->ds_prev_snap_txg, + ds->ds_phys->ds_creation_txg, + &used, &comp, &uncomp); + ds_next->ds_phys->ds_unique_bytes += used; + dsl_dataset_rele(ds_nextnext, FTAG); + ASSERT3P(ds_next->ds_prev, ==, NULL); + + /* Collapse range in this head. */ + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + dsl_deadlist_remove_key(&hds->ds_deadlist, + ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_rele(hds, FTAG); + + } else { + ASSERT3P(ds_next->ds_prev, ==, ds); + dsl_dataset_rele(ds_next->ds_prev, ds_next); + ds_next->ds_prev = NULL; + if (ds_prev) { + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, + ds_next, &ds_next->ds_prev)); + } + + dsl_dataset_recalc_head_uniq(ds_next); + + /* + * Reduce the amount of our unconsumed refreservation + * being charged to our parent by the amount of + * new unique data we have gained. + */ + if (old_unique < ds_next->ds_reserved) { + int64_t mrsdelta; + uint64_t new_unique = + ds_next->ds_phys->ds_unique_bytes; + + ASSERT(old_unique <= new_unique); + mrsdelta = MIN(new_unique - old_unique, + ds_next->ds_reserved - old_unique); + dsl_dir_diduse_space(ds->ds_dir, + DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); + } + } + dsl_dataset_rele(ds_next, FTAG); + + /* + * This must be done after the dsl_traverse(), because it will + * re-open the objset. + */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + + /* remove from snapshot namespace */ + ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); + VERIFY0(dsl_dataset_hold_obj(dp, + ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); + VERIFY0(dsl_dataset_get_snapname(ds)); +#ifdef ZFS_DEBUG + { + uint64_t val; + + err = dsl_dataset_snap_lookup(ds_head, + ds->ds_snapname, &val); + ASSERT0(err); + ASSERT3U(val, ==, obj); + } +#endif + VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx)); + dsl_dataset_rele(ds_head, FTAG); + + if (ds_prev != NULL) + dsl_dataset_rele(ds_prev, FTAG); + + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + + if (ds->ds_phys->ds_next_clones_obj != 0) { + ASSERTV(uint64_t count); + ASSERT0(zap_count(mos, + ds->ds_phys->ds_next_clones_obj, &count) && count == 0); + VERIFY0(dmu_object_free(mos, + ds->ds_phys->ds_next_clones_obj, tx)); + } + if (ds->ds_phys->ds_props_obj != 0) + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); + if (ds->ds_phys->ds_userrefs_obj != 0) + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); + dsl_dir_rele(ds->ds_dir, ds); + ds->ds_dir = NULL; + VERIFY0(dmu_object_free(mos, obj, tx)); +} + +static void +dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) +{ + dmu_snapshots_destroy_arg_t *dsda = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL); + pair != NULL; + pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) { + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + + dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx); + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * The semantics of this function are described in the comment above + * lzc_destroy_snaps(). To summarize: + * + * The snapshots must all be in the same pool. + * + * Snapshots that don't exist will be silently ignored (considered to be + * "already deleted"). + * + * On success, all snaps will be destroyed and this will return 0. + * On failure, no snaps will be destroyed, the errlist will be filled in, + * and this will return an errno. + */ +int +dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, + nvlist_t *errlist) +{ + dmu_snapshots_destroy_arg_t dsda; + int error; + nvpair_t *pair; + + pair = nvlist_next_nvpair(snaps, NULL); + if (pair == NULL) + return (0); + + dsda.dsda_snaps = snaps; + VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps, NV_UNIQUE_NAME, KM_PUSHPAGE)); + dsda.dsda_defer = defer; + dsda.dsda_errlist = errlist; + + error = dsl_sync_task(nvpair_name(pair), + dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync, + &dsda, 0); + fnvlist_free(dsda.dsda_successful_snaps); + + return (error); +} + +int +dsl_destroy_snapshot(const char *name, boolean_t defer) +{ + int error; + nvlist_t *nvl; + nvlist_t *errlist; + + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + VERIFY0(nvlist_alloc(&errlist, NV_UNIQUE_NAME, KM_PUSHPAGE)); + + fnvlist_add_boolean(nvl, name); + error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); + fnvlist_free(errlist); + fnvlist_free(nvl); + return (error); +} + +struct killarg { + dsl_dataset_t *ds; + dmu_tx_t *tx; +}; + +/* ARGSUSED */ +static int +kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) +{ + struct killarg *ka = arg; + dmu_tx_t *tx = ka->tx; + + if (bp == NULL) + return (0); + + if (zb->zb_level == ZB_ZIL_LEVEL) { + ASSERT(zilog != NULL); + /* + * It's a block in the intent log. It has no + * accounting, so just free it. + */ + dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); + } else { + ASSERT(zilog == NULL); + ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); + (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); + } + + return (0); +} + +static void +old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + struct killarg ka; + + /* + * Free everything that we point to (that's born after + * the previous snapshot, if we are a clone) + * + * NB: this should be very quick, because we already + * freed all the objects in open context. + */ + ka.ds = ds; + ka.tx = tx; + VERIFY0(traverse_dataset(ds, + ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, + kill_blkptr, &ka)); + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); +} + +typedef struct dsl_destroy_head_arg { + const char *ddha_name; +} dsl_destroy_head_arg_t; + +int +dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) +{ + int error; + uint64_t count; + objset_t *mos; + + if (dsl_dataset_is_snapshot(ds)) + return (EINVAL); + + if (refcount_count(&ds->ds_longholds) != expected_holds) + return (EBUSY); + + mos = ds->ds_dir->dd_pool->dp_meta_objset; + + /* + * Can't delete a head dataset if there are snapshots of it. + * (Except if the only snapshots are from the branch we cloned + * from.) + */ + if (ds->ds_prev != NULL && + ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) + return (EBUSY); + + /* + * Can't delete if there are children of this fs. + */ + error = zap_count(mos, + ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); + if (error != 0) + return (error); + if (count != 0) + return (EEXIST); + + if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && + ds->ds_prev->ds_phys->ds_num_children == 2 && + ds->ds_prev->ds_userrefs == 0) { + /* We need to remove the origin snapshot as well. */ + if (!refcount_is_zero(&ds->ds_prev->ds_longholds)) + return (EBUSY); + } + return (0); +} + +static int +dsl_destroy_head_check(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + + error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); + if (error != 0) + return (error); + + error = dsl_destroy_head_check_impl(ds, 0); + dsl_dataset_rele(ds, FTAG); + return (error); +} + +static void +dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) +{ + dsl_dir_t *dd; + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + dd_used_t t; + + ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); + + VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); + + ASSERT0(dd->dd_phys->dd_head_dataset_obj); + + /* + * Remove our reservation. The impl() routine avoids setting the + * actual property, which would require the (already destroyed) ds. + */ + dsl_dir_set_reservation_sync_impl(dd, 0, tx); + + ASSERT0(dd->dd_phys->dd_used_bytes); + ASSERT0(dd->dd_phys->dd_reserved); + for (t = 0; t < DD_USED_NUM; t++) + ASSERT0(dd->dd_phys->dd_used_breakdown[t]); + + VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); + VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); + VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); + VERIFY0(zap_remove(mos, + dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); + + dsl_dir_rele(dd, FTAG); + VERIFY0(dmu_object_free(mos, ddobj, tx)); +} + +void +dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + uint64_t obj, ddobj, prevobj = 0; + boolean_t rmorigin; + zfeature_info_t *async_destroy; + objset_t *os; + + ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT(ds->ds_prev == NULL || + ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); + ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + + /* We need to log before removing it from the namespace. */ + spa_history_log_internal_ds(ds, "destroy", tx, ""); + + rmorigin = (dsl_dir_is_clone(ds->ds_dir) && + DS_IS_DEFER_DESTROY(ds->ds_prev) && + ds->ds_prev->ds_phys->ds_num_children == 2 && + ds->ds_prev->ds_userrefs == 0); + + /* Remove our reservation */ + if (ds->ds_reserved != 0) { + dsl_dataset_set_refreservation_sync_impl(ds, + (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), + 0, tx); + ASSERT0(ds->ds_reserved); + } + + dsl_scan_ds_destroyed(ds, tx); + + obj = ds->ds_object; + + if (ds->ds_phys->ds_prev_snap_obj != 0) { + /* This is a clone */ + ASSERT(ds->ds_prev != NULL); + ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj); + ASSERT0(ds->ds_phys->ds_next_snap_obj); + + dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); + if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) { + dsl_dataset_remove_from_next_clones(ds->ds_prev, + obj, tx); + } + + ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1); + ds->ds_prev->ds_phys->ds_num_children--; + } + + async_destroy = + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; + + /* + * Destroy the deadlist. Unless it's a clone, the + * deadlist should be empty. (If it's a clone, it's + * safe to ignore the deadlist contents.) + */ + dsl_deadlist_close(&ds->ds_deadlist); + dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_deadlist_obj = 0; + + VERIFY0(dmu_objset_from_ds(ds, &os)); + + if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { + old_synchronous_dataset_destroy(ds, tx); + } else { + /* + * Move the bptree into the pool's list of trees to + * clean up and update space accounting information. + */ + uint64_t used, comp, uncomp; + + zil_destroy_sync(dmu_objset_zil(os), tx); + + if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { + spa_feature_incr(dp->dp_spa, async_destroy, tx); + dp->dp_bptree_obj = bptree_alloc(mos, tx); + VERIFY0(zap_add(mos, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, + &dp->dp_bptree_obj, tx)); + } + + used = ds->ds_dir->dd_phys->dd_used_bytes; + comp = ds->ds_dir->dd_phys->dd_compressed_bytes; + uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; + + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || + ds->ds_phys->ds_unique_bytes == used); + + bptree_add(mos, dp->dp_bptree_obj, + &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, + used, comp, uncomp, tx); + dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, + -used, -comp, -uncomp, tx); + dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, + used, comp, uncomp, tx); + } + + if (ds->ds_prev != NULL) { + if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { + VERIFY0(zap_remove_int(mos, + ds->ds_prev->ds_dir->dd_phys->dd_clones, + ds->ds_object, tx)); + } + prevobj = ds->ds_prev->ds_object; + dsl_dataset_rele(ds->ds_prev, ds); + ds->ds_prev = NULL; + } + + /* + * This must be done after the dsl_traverse(), because it will + * re-open the objset. + */ + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; + } + + /* Erase the link in the dir */ + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; + ddobj = ds->ds_dir->dd_object; + ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); + VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx)); + + spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); + + ASSERT0(ds->ds_phys->ds_next_clones_obj); + ASSERT0(ds->ds_phys->ds_props_obj); + ASSERT0(ds->ds_phys->ds_userrefs_obj); + dsl_dir_rele(ds->ds_dir, ds); + ds->ds_dir = NULL; + VERIFY0(dmu_object_free(mos, obj, tx)); + + dsl_dir_destroy_sync(ddobj, tx); + + if (rmorigin) { + dsl_dataset_t *prev; + VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); + dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); + dsl_dataset_rele(prev, FTAG); + } +} + +static void +dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); + dsl_destroy_head_sync_impl(ds, tx); + dsl_dataset_rele(ds, FTAG); +} + +static void +dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) +{ + dsl_destroy_head_arg_t *ddha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); + + /* Mark it as inconsistent on-disk, in case we crash */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + + spa_history_log_internal_ds(ds, "destroy begin", tx, ""); + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_destroy_head(const char *name) +{ + dsl_destroy_head_arg_t ddha; + int error; + spa_t *spa; + boolean_t isenabled; + +#ifdef _KERNEL + zfs_destroy_unmount_origin(name); +#endif + + error = spa_open(name, &spa, FTAG); + if (error != 0) + return (error); + isenabled = spa_feature_is_enabled(spa, + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]); + spa_close(spa, FTAG); + + ddha.ddha_name = name; + + if (!isenabled) { + objset_t *os; + + error = dsl_sync_task(name, dsl_destroy_head_check, + dsl_destroy_head_begin_sync, &ddha, 0); + if (error != 0) + return (error); + + /* + * Head deletion is processed in one txg on old pools; + * remove the objects from open context so that the txg sync + * is not too long. + */ + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); + if (error == 0) { + uint64_t obj; + uint64_t prev_snap_txg = + dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg; + for (obj = 0; error == 0; + error = dmu_object_next(os, &obj, FALSE, + prev_snap_txg)) + (void) dmu_free_object(os, obj); + /* sync out all frees */ + txg_wait_synced(dmu_objset_pool(os), 0); + dmu_objset_disown(os, FTAG); + } + } + + return (dsl_sync_task(name, dsl_destroy_head_check, + dsl_destroy_head_sync, &ddha, 0)); +} + +/* + * Note, this function is used as the callback for dmu_objset_find(). We + * always return 0 so that we will continue to find and process + * inconsistent datasets, even if we encounter an error trying to + * process one of them. + */ +/* ARGSUSED */ +int +dsl_destroy_inconsistent(const char *dsname, void *arg) +{ + objset_t *os; + + if (dmu_objset_hold(dsname, FTAG, &os) == 0) { + boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os)); + dmu_objset_rele(os, FTAG); + if (inconsistent) + (void) dsl_destroy_head(dsname); + } + return (0); +} + + +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(dsl_destroy_head); +EXPORT_SYMBOL(dsl_destroy_head_sync_impl); +EXPORT_SYMBOL(dsl_dataset_user_hold_check_one); +EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl); +EXPORT_SYMBOL(dsl_destroy_inconsistent); +EXPORT_SYMBOL(dsl_dataset_user_release_tmp); +EXPORT_SYMBOL(dsl_destroy_head_check_impl); +#endif diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 45c73c363e57..ccae3f2709f4 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -40,8 +40,6 @@ #include "zfs_namecheck.h" static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); -static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, - uint64_t value, dmu_tx_t *tx); /* ARGSUSED */ static void @@ -58,7 +56,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) } if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); spa_close(dd->dd_pool->dp_spa, dd); @@ -72,18 +70,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg) } int -dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, +dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, const char *tail, void *tag, dsl_dir_t **ddp) { dmu_buf_t *dbuf; dsl_dir_t *dd; int err; - ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || - dsl_pool_sync_context(dp)); + ASSERT(dsl_pool_config_held(dp)); err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); - if (err) + if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); #ifdef ZFS_DEBUG @@ -110,9 +107,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_snap_cmtime_update(dd); if (dd->dd_phys->dd_parent_obj) { - err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, + err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj, NULL, dd, &dd->dd_parent); - if (err) + if (err != 0) goto errout; if (tail) { #ifdef ZFS_DEBUG @@ -129,7 +126,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_parent->dd_phys->dd_child_dir_zapobj, ddobj, 0, dd->dd_myname); } - if (err) + if (err != 0) goto errout; } else { (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); @@ -146,7 +143,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, */ err = dmu_bonus_hold(dp->dp_meta_objset, dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); - if (err) + if (err != 0) goto errout; origin_phys = origin_bonus->db_data; dd->dd_origin_txg = @@ -158,7 +155,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, dsl_dir_evict); if (winner) { if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dd = winner; @@ -185,7 +182,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, errout: if (dd->dd_parent) - dsl_dir_close(dd->dd_parent, dd); + dsl_dir_rele(dd->dd_parent, dd); mutex_destroy(&dd->dd_lock); kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); @@ -193,7 +190,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, } void -dsl_dir_close(dsl_dir_t *dd, void *tag) +dsl_dir_rele(dsl_dir_t *dd, void *tag) { dprintf_dd(dd, "%s\n", ""); spa_close(dd->dd_pool->dp_spa, tag); @@ -250,6 +247,7 @@ static int getcomponent(const char *path, char *component, const char **nextp) { char *p; + if ((path == NULL) || (path[0] == '\0')) return (ENOENT); /* This would be a good place to reserve some namespace... */ @@ -272,10 +270,10 @@ getcomponent(const char *path, char *component, const char **nextp) (void) strcpy(component, path); p = NULL; } else if (p[0] == '/') { - if (p-path >= MAXNAMELEN) + if (p - path >= MAXNAMELEN) return (ENAMETOOLONG); (void) strncpy(component, path, p - path); - component[p-path] = '\0'; + component[p - path] = '\0'; p++; } else if (p[0] == '@') { /* @@ -284,66 +282,57 @@ getcomponent(const char *path, char *component, const char **nextp) */ if (strchr(path, '/')) return (EINVAL); - if (p-path >= MAXNAMELEN) + if (p - path >= MAXNAMELEN) return (ENAMETOOLONG); (void) strncpy(component, path, p - path); - component[p-path] = '\0'; + component[p - path] = '\0'; } else { - ASSERT(!"invalid p"); + panic("invalid p=%p", (void *)p); } *nextp = p; return (0); } /* - * same as dsl_dir_open, ignore the first component of name and use the - * spa instead + * Return the dsl_dir_t, and possibly the last component which couldn't + * be found in *tail. The name must be in the specified dsl_pool_t. This + * thread must hold the dp_config_rwlock for the pool. Returns NULL if the + * path is bogus, or if tail==NULL and we couldn't parse the whole name. + * (*tail)[0] == '@' means that the last component is a snapshot. */ int -dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, +dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) { char *buf; - const char *next, *nextnext = NULL; + const char *spaname, *next, *nextnext = NULL; int err; dsl_dir_t *dd; - dsl_pool_t *dp; uint64_t ddobj; - int openedspa = FALSE; - - dprintf("%s\n", name); buf = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); err = getcomponent(name, buf, &next); - if (err) + if (err != 0) goto error; - if (spa == NULL) { - err = spa_open(buf, &spa, FTAG); - if (err) { - dprintf("spa_open(%s) failed\n", buf); - goto error; - } - openedspa = TRUE; - /* XXX this assertion belongs in spa_open */ - ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); + /* Make sure the name is in the specified pool. */ + spaname = spa_name(dp->dp_spa); + if (strcmp(buf, spaname) != 0) { + err = EINVAL; + goto error; } - dp = spa_get_dsl(spa); + ASSERT(dsl_pool_config_held(dp)); - rw_enter(&dp->dp_config_rwlock, RW_READER); - err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); - if (err) { - rw_exit(&dp->dp_config_rwlock); - if (openedspa) - spa_close(spa, FTAG); + err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); + if (err != 0) { goto error; } while (next != NULL) { dsl_dir_t *child_ds; err = getcomponent(next, buf, &nextnext); - if (err) + if (err != 0) break; ASSERT(next[0] != '\0'); if (next[0] == '@') @@ -354,25 +343,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, err = zap_lookup(dp->dp_meta_objset, dd->dd_phys->dd_child_dir_zapobj, buf, sizeof (ddobj), 1, &ddobj); - if (err) { + if (err != 0) { if (err == ENOENT) err = 0; break; } - err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); - if (err) + err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds); + if (err != 0) break; - dsl_dir_close(dd, tag); + dsl_dir_rele(dd, tag); dd = child_ds; next = nextnext; } - rw_exit(&dp->dp_config_rwlock); - if (err) { - dsl_dir_close(dd, tag); - if (openedspa) - spa_close(spa, FTAG); + if (err != 0) { + dsl_dir_rele(dd, tag); goto error; } @@ -383,32 +369,18 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, if (next != NULL && (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { /* bad path name */ - dsl_dir_close(dd, tag); + dsl_dir_rele(dd, tag); dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); err = ENOENT; } - if (tailp) + if (tailp != NULL) *tailp = next; - if (openedspa) - spa_close(spa, FTAG); *ddp = dd; error: kmem_free(buf, MAXNAMELEN); return (err); } -/* - * Return the dsl_dir_t, and possibly the last component which couldn't - * be found in *tail. Return NULL if the path is bogus, or if - * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' - * means that the last component is a snapshot. - */ -int -dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) -{ - return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); -} - uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx) @@ -446,71 +418,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, return (ddobj); } -/* ARGSUSED */ -int -dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - dsl_pool_t *dp = dd->dd_pool; - objset_t *mos = dp->dp_meta_objset; - int err; - uint64_t count; - - /* - * There should be exactly two holds, both from - * dsl_dataset_destroy: one on the dd directory, and one on its - * head ds. If there are more holds, then a concurrent thread is - * performing a lookup inside this dir while we're trying to destroy - * it. To minimize this possibility, we perform this check only - * in syncing context and fail the operation if we encounter - * additional holds. The dp_config_rwlock ensures that nobody else - * opens it after we check. - */ - if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2) - return (EBUSY); - - err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); - if (err) - return (err); - if (count != 0) - return (EEXIST); - - return (0); -} - -void -dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) -{ - dsl_dir_t *dd = arg1; - objset_t *mos = dd->dd_pool->dp_meta_objset; - uint64_t obj; - dd_used_t t; - - ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); - ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); - - /* - * Remove our reservation. The impl() routine avoids setting the - * actual property, which would require the (already destroyed) ds. - */ - dsl_dir_set_reservation_sync_impl(dd, 0, tx); - - ASSERT0(dd->dd_phys->dd_used_bytes); - ASSERT0(dd->dd_phys->dd_reserved); - for (t = 0; t < DD_USED_NUM; t++) - ASSERT0(dd->dd_phys->dd_used_breakdown[t]); - - VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); - VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); - VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); - VERIFY(0 == zap_remove(mos, - dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); - - obj = dd->dd_object; - dsl_dir_close(dd, tag); - VERIFY(0 == dmu_object_free(mos, obj, tx)); -} - boolean_t dsl_dir_is_clone(dsl_dir_t *dd) { @@ -546,18 +453,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) } mutex_exit(&dd->dd_lock); - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); if (dsl_dir_is_clone(dd)) { dsl_dataset_t *ds; char buf[MAXNAMELEN]; - VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_origin_obj, FTAG, &ds)); dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); } - rw_exit(&dd->dd_pool->dp_config_rwlock); } void @@ -567,7 +472,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) ASSERT(dd->dd_phys); - if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { + if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(dd->dd_dbuf, dd); } @@ -854,7 +759,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, FALSE, asize > usize, tr_list, tx, TRUE); } - if (err) + if (err != 0) dsl_dir_tempreserve_clear(tr_list, tx); else *tr_cookiep = tr_list; @@ -1007,118 +912,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, mutex_exit(&dd->dd_lock); } +typedef struct dsl_dir_set_qr_arg { + const char *ddsqra_name; + zprop_source_t ddsqra_source; + uint64_t ddsqra_value; +} dsl_dir_set_qr_arg_t; + static int -dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - int err; - uint64_t towrite; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + uint64_t towrite, newval; - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + + error = dsl_prop_predict(ds->ds_dir, "quota", + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } - if (psa->psa_effective_value == 0) + if (newval == 0) { + dsl_dataset_rele(ds, FTAG); return (0); + } - mutex_enter(&dd->dd_lock); + mutex_enter(&ds->ds_dir->dd_lock); /* * If we are doing the preliminary check in open context, and * there are pending changes, then don't fail it, since the * pending changes could under-estimate the amount of space to be * freed up. */ - towrite = dsl_dir_space_towrite(dd); + towrite = dsl_dir_space_towrite(ds->ds_dir); if ((dmu_tx_is_syncing(tx) || towrite == 0) && - (psa->psa_effective_value < dd->dd_phys->dd_reserved || - psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) { - err = ENOSPC; + (newval < ds->ds_dir->dd_phys->dd_reserved || + newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) { + error = ENOSPC; } - mutex_exit(&dd->dd_lock); - return (err); + mutex_exit(&ds->ds_dir->dd_lock); + dsl_dataset_rele(ds, FTAG); + return (error); } -extern dsl_syncfunc_t dsl_prop_set_sync; - static void -dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value = psa->psa_effective_value; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + uint64_t newval; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); - dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - mutex_enter(&dd->dd_lock); - dd->dd_phys->dd_quota = effective_value; - mutex_exit(&dd->dd_lock); + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_QUOTA), &newval)); - spa_history_log_internal_dd(dd, "set quota", tx, - "quota=%lld", (longlong_t)effective_value); + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + mutex_enter(&ds->ds_dir->dd_lock); + ds->ds_dir->dd_phys->dd_quota = newval; + mutex_exit(&ds->ds_dir->dd_lock); + dsl_dataset_rele(ds, FTAG); } int dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) { - dsl_dir_t *dd; - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; - - dsl_prop_setarg_init_uint64(&psa, "quota", source, "a); - - err = dsl_dataset_hold(ddname, FTAG, &ds); - if (err) - return (err); - - err = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } + dsl_dir_set_qr_arg_t ddsqra; - ASSERT(ds->ds_dir == dd); + ddsqra.ddsqra_name = ddname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = quota; - /* - * If someone removes a file, then tries to set the quota, we want to - * make sure the file freeing takes effect. - */ - txg_wait_open(dd->dd_pool, 0); - - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, - dsl_dir_set_quota_sync, ds, &psa, 0); - - dsl_dir_close(dd, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); + return (dsl_sync_task(ddname, dsl_dir_set_quota_check, + dsl_dir_set_quota_sync, &ddsqra, 0)); } int -dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t effective_value; - uint64_t used, avail; - int err; - - if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0) - return (err); + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + dsl_dir_t *dd; + uint64_t newval, used, avail; + int error; - effective_value = psa->psa_effective_value; + error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); + if (error != 0) + return (error); + dd = ds->ds_dir; /* * If we are doing the preliminary check in open context, the * space estimates may be inaccurate. */ - if (!dmu_tx_is_syncing(tx)) + if (!dmu_tx_is_syncing(tx)) { + dsl_dataset_rele(ds, FTAG); return (0); + } + + error = dsl_prop_predict(ds->ds_dir, + zfs_prop_to_name(ZFS_PROP_RESERVATION), + ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); + if (error != 0) { + dsl_dataset_rele(ds, FTAG); + return (error); + } mutex_enter(&dd->dd_lock); used = dd->dd_phys->dd_used_bytes; @@ -1131,21 +1041,21 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; } - if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) { - uint64_t delta = MAX(used, effective_value) - + if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) { + uint64_t delta = MAX(used, newval) - MAX(used, dd->dd_phys->dd_reserved); - if (delta > avail) - return (ENOSPC); - if (dd->dd_phys->dd_quota > 0 && - effective_value > dd->dd_phys->dd_quota) - return (ENOSPC); + if (delta > avail || + (dd->dd_phys->dd_quota > 0 && + newval > dd->dd_phys->dd_quota)) + error = ENOSPC; } - return (0); + dsl_dataset_rele(ds, FTAG); + return (error); } -static void +void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) { uint64_t used; @@ -1167,51 +1077,38 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) } static void -dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_dir_t *dd = ds->ds_dir; - dsl_prop_setarg_t *psa = arg2; - uint64_t value = psa->psa_effective_value; + dsl_dir_set_qr_arg_t *ddsqra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + uint64_t newval; - dsl_prop_set_sync(ds, psa, tx); - DSL_PROP_CHECK_PREDICTION(dd, psa); - - dsl_dir_set_reservation_sync_impl(dd, value, tx); + VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); + + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION), + ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, + &ddsqra->ddsqra_value, tx); - spa_history_log_internal_dd(dd, "set reservation", tx, - "reservation=%lld", (longlong_t)value); + VERIFY0(dsl_prop_get_int_ds(ds, + zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval)); + + dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx); + dsl_dataset_rele(ds, FTAG); } int dsl_dir_set_reservation(const char *ddname, zprop_source_t source, uint64_t reservation) { - dsl_dir_t *dd; - dsl_dataset_t *ds; - dsl_prop_setarg_t psa; - int err; - - dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation); - - err = dsl_dataset_hold(ddname, FTAG, &ds); - if (err) - return (err); - - err = dsl_dir_open(ddname, FTAG, &dd, NULL); - if (err) { - dsl_dataset_rele(ds, FTAG); - return (err); - } + dsl_dir_set_qr_arg_t ddsqra; - ASSERT(ds->ds_dir == dd); + ddsqra.ddsqra_name = ddname; + ddsqra.ddsqra_source = source; + ddsqra.ddsqra_value = reservation; - err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, - dsl_dir_set_reservation_sync, ds, &psa, 0); - - dsl_dir_close(dd, FTAG); - dsl_dataset_rele(ds, FTAG); - return (err); + return (dsl_sync_task(ddname, dsl_dir_set_reservation_check, + dsl_dir_set_reservation_sync, &ddsqra, 0)); } static dsl_dir_t * @@ -1243,79 +1140,123 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) return (would_change(dd->dd_parent, delta, ancestor)); } -struct renamearg { - dsl_dir_t *newparent; - const char *mynewname; -}; +typedef struct dsl_dir_rename_arg { + const char *ddra_oldname; + const char *ddra_newname; +} dsl_dir_rename_arg_t; +/* ARGSUSED */ static int -dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - dsl_dir_t *dd = arg1; - struct renamearg *ra = arg2; - dsl_pool_t *dp = dd->dd_pool; - objset_t *mos = dp->dp_meta_objset; - int err; - uint64_t val; + int *deltap = arg; + char namebuf[MAXNAMELEN]; - /* - * There should only be one reference, from dmu_objset_rename(). - * Fleeting holds are also possible (eg, from "zfs list" getting - * stats), but any that are present in open context will likely - * be gone by syncing context, so only fail from syncing - * context. - */ - if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1) - return (EBUSY); + dsl_dataset_name(ds, namebuf); + + if (strlen(namebuf) + *deltap >= MAXNAMELEN) + return (ENAMETOOLONG); + return (0); +} + +static int +dsl_dir_rename_check(void *arg, dmu_tx_t *tx) +{ + dsl_dir_rename_arg_t *ddra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd, *newparent; + const char *mynewname; + int error; + int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname); - /* check for existing name */ - err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, - ra->mynewname, 8, 1, &val); - if (err == 0) + /* target dir should exist */ + error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL); + if (error != 0) + return (error); + + /* new parent should exist */ + error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG, + &newparent, &mynewname); + if (error != 0) { + dsl_dir_rele(dd, FTAG); + return (error); + } + + /* can't rename to different pool */ + if (dd->dd_pool != newparent->dd_pool) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (ENXIO); + } + + /* new name should not already exist */ + if (mynewname == NULL) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); return (EEXIST); - if (err != ENOENT) - return (err); + } + + /* if the name length is growing, validate child name lengths */ + if (delta > 0) { + error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename, + &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (error); + } + } - if (ra->newparent != dd->dd_parent) { + if (newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); /* no rename into our descendant */ - if (closest_common_ancestor(dd, ra->newparent) == dd) + if (closest_common_ancestor(dd, newparent) == dd) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); return (EINVAL); + } - if ((err = dsl_dir_transfer_possible(dd->dd_parent, - ra->newparent, myspace))) - return (err); + error = dsl_dir_transfer_possible(dd->dd_parent, + newparent, myspace); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (error); + } } + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); return (0); } static void -dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) { - dsl_dir_t *dd = arg1; - struct renamearg *ra = arg2; - dsl_pool_t *dp = dd->dd_pool; + dsl_dir_rename_arg_t *ddra = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd, *newparent; + const char *mynewname; + int error; objset_t *mos = dp->dp_meta_objset; - int err; - char namebuf[MAXNAMELEN]; - ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); + VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL)); + VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, + &mynewname)); /* Log this before we change the name. */ - dsl_dir_name(ra->newparent, namebuf); spa_history_log_internal_dd(dd, "rename", tx, - "-> %s/%s", namebuf, ra->mynewname); + "-> %s", ddra->ddra_newname); - if (ra->newparent != dd->dd_parent) { + if (newparent != dd->dd_parent) { dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, -dd->dd_phys->dd_used_bytes, -dd->dd_phys->dd_compressed_bytes, -dd->dd_phys->dd_uncompressed_bytes, tx); - dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD, + dsl_dir_diduse_space(newparent, DD_USED_CHILD, dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_compressed_bytes, dd->dd_phys->dd_uncompressed_bytes, tx); @@ -1326,7 +1267,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, -unused_rsrv, 0, 0, tx); - dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV, + dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV, unused_rsrv, 0, 0, tx); } } @@ -1334,52 +1275,36 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) dmu_buf_will_dirty(dd->dd_dbuf, tx); /* remove from old parent zapobj */ - err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, + error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx); - ASSERT0(err); + ASSERT0(error); - (void) strcpy(dd->dd_myname, ra->mynewname); - dsl_dir_close(dd->dd_parent, dd); - dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; - VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, - ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); + (void) strcpy(dd->dd_myname, mynewname); + dsl_dir_rele(dd->dd_parent, dd); + dd->dd_phys->dd_parent_obj = newparent->dd_object; + VERIFY0(dsl_dir_hold_obj(dp, + newparent->dd_object, NULL, dd, &dd->dd_parent)); /* add to new parent zapobj */ - err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, - dd->dd_myname, 8, 1, &dd->dd_object, tx); - ASSERT0(err); + VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj, + dd->dd_myname, 8, 1, &dd->dd_object, tx)); + + dsl_prop_notify_all(dd); + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); } int -dsl_dir_rename(dsl_dir_t *dd, const char *newname) +dsl_dir_rename(const char *oldname, const char *newname) { - struct renamearg ra; - int err; + dsl_dir_rename_arg_t ddra; - /* new parent should exist */ - err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); - if (err) - return (err); + ddra.ddra_oldname = oldname; + ddra.ddra_newname = newname; - /* can't rename to different pool */ - if (dd->dd_pool != ra.newparent->dd_pool) { - err = ENXIO; - goto out; - } - - /* new name should not already exist */ - if (ra.mynewname == NULL) { - err = EEXIST; - goto out; - } - - err = dsl_sync_task_do(dd->dd_pool, - dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); - -out: - dsl_dir_close(ra.newparent, FTAG); - return (err); + return (dsl_sync_task(oldname, + dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3)); } int @@ -1424,6 +1349,4 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd) #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); -EXPORT_SYMBOL(dsl_dir_open); -EXPORT_SYMBOL(dsl_dir_close); #endif diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 7795d8045e92..b59e056bfb57 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -43,6 +43,7 @@ #include #include #include +#include int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ @@ -264,7 +265,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) if (err) return (err); - return (dsl_dir_open_obj(dp, obj, name, dp, ddp)); + return (dsl_dir_hold_obj(dp, obj, name, dp, ddp)); } static dsl_pool_t * @@ -276,7 +277,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); dp->dp_spa = spa; dp->dp_meta_rootbp = *bp; - rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); + rrw_init(&dp->dp_config_rwlock, B_TRUE); dp->dp_write_limit = zfs_write_limit_min; txg_init(dp, txg); @@ -287,7 +288,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, - offsetof(dsl_sync_task_group_t, dstg_node)); + offsetof(dsl_sync_task_t, dst_node)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); @@ -324,14 +325,14 @@ dsl_pool_open(dsl_pool_t *dp) dsl_dataset_t *ds; uint64_t obj; - rw_enter(&dp->dp_config_rwlock, RW_WRITER); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; - err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; @@ -352,7 +353,7 @@ dsl_pool_open(dsl_pool_t *dp) &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } - dsl_dir_close(dd, dp); + dsl_dir_rele(dd, dp); if (err) goto out; } @@ -367,7 +368,7 @@ dsl_pool_open(dsl_pool_t *dp) DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } @@ -400,7 +401,7 @@ dsl_pool_open(dsl_pool_t *dp) err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: - rw_exit(&dp->dp_config_rwlock); + rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); } @@ -415,13 +416,13 @@ dsl_pool_close(dsl_pool_t *dp) * and not a hold, so just drop that here. */ if (dp->dp_origin_snap) - dsl_dataset_drop_ref(dp->dp_origin_snap, dp); + dsl_dataset_rele(dp->dp_origin_snap, dp); if (dp->dp_mos_dir) - dsl_dir_close(dp->dp_mos_dir, dp); + dsl_dir_rele(dp->dp_mos_dir, dp); if (dp->dp_free_dir) - dsl_dir_close(dp->dp_free_dir, dp); + dsl_dir_rele(dp->dp_free_dir, dp); if (dp->dp_root_dir) - dsl_dir_close(dp->dp_root_dir, dp); + dsl_dir_rele(dp->dp_root_dir, dp); bpobj_close(&dp->dp_free_bpobj); @@ -439,7 +440,7 @@ dsl_pool_close(dsl_pool_t *dp) dsl_scan_fini(dp); dsl_pool_tx_assign_destroy(dp); dsl_pool_txg_history_destroy(dp); - rw_destroy(&dp->dp_config_rwlock); + rrw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_iput_taskq); if (dp->dp_blkstats) @@ -457,6 +458,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) dsl_dataset_t *ds; uint64_t obj; + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); @@ -467,30 +470,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) ASSERT0(err); /* Initialize scan structures */ - VERIFY3U(0, ==, dsl_scan_init(dp, txg)); + VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); - VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, + VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } @@ -501,7 +504,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ - VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx))); #ifdef _KERNEL @@ -511,6 +514,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) dmu_tx_commit(tx); + rrw_exit(&dp->dp_config_rwlock, FTAG); + return (dp); } @@ -533,10 +538,7 @@ static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; - dsl_pool_t *dp = dmu_objset_pool(dl->dl_os); - rw_enter(&dp->dp_config_rwlock, RW_READER); dsl_deadlist_insert(dl, bp, tx); - rw_exit(&dp->dp_config_rwlock); return (0); } @@ -558,7 +560,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) /* * We need to copy dp_space_towrite() before doing - * dsl_sync_task_group_sync(), because + * dsl_sync_task_sync(), because * dsl_dataset_snapshot_reserve_space() will increase * dp_space_towrite but not actually write anything. */ @@ -673,14 +675,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) */ DTRACE_PROBE(pool_sync__3task); if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { - dsl_sync_task_group_t *dstg; + dsl_sync_task_t *dst; /* * No more sync tasks should have been added while we * were syncing. */ ASSERT(spa_sync_pass(dp->dp_spa) == 1); - while ((dstg = txg_list_remove(&dp->dp_sync_tasks, txg))) - dsl_sync_task_group_sync(dstg, tx); + while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg))) + dsl_sync_task_sync(dst, tx); } dmu_tx_commit(tx); @@ -857,14 +859,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) /* ARGSUSED */ static int -upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds, *prev = NULL; int err; - dsl_pool_t *dp = spa_get_dsl(spa); - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); @@ -890,7 +891,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) * The $ORIGIN can't have any data, or the accounting * will be wrong. */ - ASSERT(prev->ds_phys->ds_bp.blk_birth == 0); + ASSERT0(prev->ds_phys->ds_bp.blk_birth); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { @@ -910,13 +911,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) if (ds->ds_phys->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); - VERIFY(0 == dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); } } - ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object); - ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object); + ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object); + ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object); if (prev->ds_phys->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); @@ -924,7 +925,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY(0 == zap_add_int(dp->dp_meta_objset, + VERIFY0(zap_add_int(dp->dp_meta_objset, prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); @@ -939,25 +940,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap != NULL); - VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb, + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb, tx, DS_FIND_CHILDREN)); } /* ARGSUSED */ static int -upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { dmu_tx_t *tx = arg; - dsl_dataset_t *ds; - dsl_pool_t *dp = spa_get_dsl(spa); objset_t *mos = dp->dp_meta_objset; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - - if (ds->ds_dir->dd_phys->dd_origin_obj) { + if (ds->ds_dir->dd_phys->dd_origin_obj != 0) { dsl_dataset_t *origin; - VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin)); if (origin->ds_dir->dd_phys->dd_clones == 0) { @@ -966,13 +963,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } - VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, - origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + VERIFY0(zap_add_int(dp->dp_meta_objset, + origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx)); dsl_dataset_rele(origin, FTAG); } - - dsl_dataset_rele(ds, FTAG); return (0); } @@ -984,7 +979,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); - VERIFY(0 == dsl_pool_open_special_dir(dp, + VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* @@ -994,12 +989,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); - VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); - VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj, - dp->dp_meta_objset, obj)); + VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); - VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN)); } @@ -1011,17 +1005,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap == NULL); + ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); /* create the origin dir, ds, & snap-ds */ - rw_enter(&dp->dp_config_rwlock, RW_WRITER); dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, NULL, 0, kcred, tx); - VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx); - VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); + VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); - rw_exit(&dp->dp_config_rwlock); } taskq_t * @@ -1056,7 +1049,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) *htag = '\0'; ++htag; dsobj = strtonum(za.za_name, NULL); - (void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE); + dsl_dataset_user_release_tmp(dp, dsobj, htag); } zap_cursor_fini(&zc); } @@ -1078,7 +1071,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) static int dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, - const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding) + const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding) { objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; @@ -1103,7 +1096,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); if (holding) - error = zap_add(mos, zapobj, name, 8, 1, now, tx); + error = zap_add(mos, zapobj, name, 8, 1, &now, tx); else error = zap_remove(mos, zapobj, name, tx); strfree(name); @@ -1116,7 +1109,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, */ int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, - uint64_t *now, dmu_tx_t *tx) + uint64_t now, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); } @@ -1128,10 +1121,113 @@ int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx) { - return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL, + return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0, tx, B_FALSE)); } +/* + * DSL Pool Configuration Lock + * + * The dp_config_rwlock protects against changes to DSL state (e.g. dataset + * creation / destruction / rename / property setting). It must be held for + * read to hold a dataset or dsl_dir. I.e. you must call + * dsl_pool_config_enter() or dsl_pool_hold() before calling + * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock + * must be held continuously until all datasets and dsl_dirs are released. + * + * The only exception to this rule is that if a "long hold" is placed on + * a dataset, then the dp_config_rwlock may be dropped while the dataset + * is still held. The long hold will prevent the dataset from being + * destroyed -- the destroy will fail with EBUSY. A long hold can be + * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset + * (by calling dsl_{dataset,objset}_{try}own{_obj}). + * + * Legitimate long-holders (including owners) should be long-running, cancelable + * tasks that should cause "zfs destroy" to fail. This includes DMU + * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open), + * "zfs send", and "zfs diff". There are several other long-holders whose + * uses are suboptimal (e.g. "zfs promote", and zil_suspend()). + * + * The usual formula for long-holding would be: + * dsl_pool_hold() + * dsl_dataset_hold() + * ... perform checks ... + * dsl_dataset_long_hold() + * dsl_pool_rele() + * ... perform long-running task ... + * dsl_dataset_long_rele() + * dsl_dataset_rele() + * + * Note that when the long hold is released, the dataset is still held but + * the pool is not held. The dataset may change arbitrarily during this time + * (e.g. it could be destroyed). Therefore you shouldn't do anything to the + * dataset except release it. + * + * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only + * or modifying operations. + * + * Modifying operations should generally use dsl_sync_task(). The synctask + * infrastructure enforces proper locking strategy with respect to the + * dp_config_rwlock. See the comment above dsl_sync_task() for details. + * + * Read-only operations will manually hold the pool, then the dataset, obtain + * information from the dataset, then release the pool and dataset. + * dmu_objset_{hold,rele}() are convenience routines that also do the pool + * hold/rele. + */ + +int +dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp) +{ + spa_t *spa; + int error; + + error = spa_open(name, &spa, tag); + if (error == 0) { + *dp = spa_get_dsl(spa); + dsl_pool_config_enter(*dp, tag); + } + return (error); +} + +void +dsl_pool_rele(dsl_pool_t *dp, void *tag) +{ + dsl_pool_config_exit(dp, tag); + spa_close(dp->dp_spa, tag); +} + +void +dsl_pool_config_enter(dsl_pool_t *dp, void *tag) +{ + /* + * We use a "reentrant" reader-writer lock, but not reentrantly. + * + * The rrwlock can (with the track_all flag) track all reading threads, + * which is very useful for debugging which code path failed to release + * the lock, and for verifying that the *current* thread does hold + * the lock. + * + * (Unlike a rwlock, which knows that N threads hold it for + * read, but not *which* threads, so rw_held(RW_READER) returns TRUE + * if any thread holds it for read, even if this thread doesn't). + */ + ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); + rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); +} + +void +dsl_pool_config_exit(dsl_pool_t *dp, void *tag) +{ + rrw_exit(&dp->dp_config_rwlock, tag); +} + +boolean_t +dsl_pool_config_held(dsl_pool_t *dp) +{ + return (RRW_LOCK_HELD(&dp->dp_config_rwlock)); +} + #if defined(_KERNEL) && defined(HAVE_SPL) module_param(zfs_no_write_throttle, int, 0644); MODULE_PARM_DESC(zfs_no_write_throttle, "Disable write throttling"); diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 153420ccf5f1..1d981a7eeaf8 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -82,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, char *inheritstr; char *recvdstr; - ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); + ASSERT(dsl_pool_config_held(dd->dd_pool)); if (setpoint) setpoint[0] = '\0'; @@ -97,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname, * after this loop. */ for (; dd != NULL; dd = dd->dd_parent) { - ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock)); - if (dd != target || snapshot) { if (!inheritable) break; @@ -167,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname, boolean_t snapshot; uint64_t zapobj; - ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock)); + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)); zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj); @@ -231,22 +229,16 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, dsl_prop_changed_cb_t *callback, void *cbarg) { dsl_dir_t *dd = ds->ds_dir; - dsl_pool_t *dp = dd->dd_pool; uint64_t value; dsl_prop_cb_record_t *cbr; int err; - int need_rwlock; + ASSERTV(dsl_pool_t *dp = dd->dd_pool); - need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock); - if (need_rwlock) - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); - err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL); - if (err != 0) { - if (need_rwlock) - rw_exit(&dp->dp_config_rwlock); + err = dsl_prop_get_int_ds(ds, propname, &value); + if (err != 0) return (err); - } cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_PUSHPAGE); cbr->cbr_ds = ds; @@ -259,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, mutex_exit(&dd->dd_lock); cbr->cbr_func(cbr->cbr_arg, value); - - if (need_rwlock) - rw_exit(&dp->dp_config_rwlock); return (0); } @@ -269,19 +258,18 @@ int dsl_prop_get(const char *dsname, const char *propname, int intsz, int numints, void *buf, char *setpoint) { - dsl_dataset_t *ds; - int err; + objset_t *os; + int error; - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); + error = dmu_objset_hold(dsname, FTAG, &os); + if (error != 0) + return (error); - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + error = dsl_prop_get_ds(dmu_objset_ds(os), propname, + intsz, numints, buf, setpoint); - dsl_dataset_rele(ds, FTAG); - return (err); + dmu_objset_rele(os, FTAG); + return (error); } /* @@ -299,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname, return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint)); } -void -dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, - zprop_source_t source, uint64_t *value) +int +dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname, + uint64_t *valuep) { - psa->psa_name = propname; - psa->psa_source = source; - psa->psa_intsz = 8; - psa->psa_numints = 1; - psa->psa_value = value; - - psa->psa_effective_value = -1ULL; + return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL)); } /* @@ -323,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname, * a property not handled by this function. */ int -dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) +dsl_prop_predict(dsl_dir_t *dd, const char *propname, + zprop_source_t source, uint64_t value, uint64_t *newvalp) { - const char *propname = psa->psa_name; zfs_prop_t prop = zfs_name_to_prop(propname); - zprop_source_t source = psa->psa_source; objset_t *mos; uint64_t zapobj; uint64_t version; @@ -359,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) switch ((int)source) { case ZPROP_SRC_NONE: /* Revert to the received value, if any. */ - err = zap_lookup(mos, zapobj, recvdstr, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = 0; + *newvalp = 0; break; case ZPROP_SRC_LOCAL: - psa->psa_effective_value = *(uint64_t *)psa->psa_value; + *newvalp = value; break; case ZPROP_SRC_RECEIVED: /* * If there's no local setting, then the new received value will * be the effective value. */ - err = zap_lookup(mos, zapobj, propname, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = *(uint64_t *)psa->psa_value; + *newvalp = value; break; case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED): /* * We're clearing the received value, so the local setting (if * it exists) remains the effective value. */ - err = zap_lookup(mos, zapobj, propname, 8, 1, - &psa->psa_effective_value); + err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) - psa->psa_effective_value = 0; + *newvalp = 0; break; default: - cmn_err(CE_PANIC, "unexpected property source: %d", source); + panic("unexpected property source: %d", source); } strfree(recvdstr); @@ -399,39 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa) return (err); } -#ifdef ZFS_DEBUG -void -dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa) -{ - zfs_prop_t prop = zfs_name_to_prop(psa->psa_name); - uint64_t intval; - char setpoint[MAXNAMELEN]; - uint64_t version = spa_version(dd->dd_pool->dp_spa); - int err; - - if (version < SPA_VERSION_RECVD_PROPS) { - switch (prop) { - case ZFS_PROP_QUOTA: - case ZFS_PROP_RESERVATION: - return; - default: - break; - } - } - - err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval, - setpoint, B_FALSE); - if (err == 0 && intval != psa->psa_effective_value) { - cmn_err(CE_PANIC, "%s property, source: %x, " - "predicted effective value: %llu, " - "actual effective value: %llu (setpoint: %s)", - psa->psa_name, psa->psa_source, - (unsigned long long)psa->psa_effective_value, - (unsigned long long)intval, setpoint); - } -} -#endif - /* * Unregister this callback. Return 0 on success, ENOENT if ddname is * invalid, ENOMSG if no matching callback registered. @@ -466,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname, return (0); } -/* - * Return the number of callbacks that are registered for this dataset. - */ -int -dsl_prop_numcb(dsl_dataset_t *ds) +boolean_t +dsl_prop_hascb(dsl_dataset_t *ds) { dsl_dir_t *dd = ds->ds_dir; + boolean_t rv = B_FALSE; dsl_prop_cb_record_t *cbr; - int num = 0; mutex_enter(&dd->dd_lock); - for (cbr = list_head(&dd->dd_prop_cbs); - cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { - if (cbr->cbr_ds == ds) - num++; + for (cbr = list_head(&dd->dd_prop_cbs); cbr; + cbr = list_next(&dd->dd_prop_cbs, cbr)) { + if (cbr->cbr_ds == ds) { + rv = B_TRUE; + break; + } } mutex_exit(&dd->dd_lock); + return (rv); +} - return (num); +/* ARGSUSED */ +static int +dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) +{ + dsl_dir_t *dd = ds->ds_dir; + dsl_prop_cb_record_t *cbr; + + mutex_enter(&dd->dd_lock); + for (cbr = list_head(&dd->dd_prop_cbs); cbr; + cbr = list_next(&dd->dd_prop_cbs, cbr)) { + uint64_t value; + + if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname, + sizeof (value), 1, &value, NULL) == 0) + cbr->cbr_func(cbr->cbr_arg, value); + } + mutex_exit(&dd->dd_lock); + + return (0); +} + +/* + * Update all property values for ddobj & its descendants. This is used + * when renaming the dir. + */ +void +dsl_prop_notify_all(dsl_dir_t *dd) +{ + dsl_pool_t *dp = dd->dd_pool; + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + (void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb, + NULL, DS_FIND_CHILDREN); } static void @@ -498,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, zap_attribute_t *za; int err; - ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); - err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); if (err) return; @@ -510,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, */ err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname); if (err == 0) { - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); return; } ASSERT3U(err, ==, ENOENT); @@ -545,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, } kmem_free(za, sizeof (zap_attribute_t)); zap_cursor_fini(&zc); - dsl_dir_close(dd, FTAG); + dsl_dir_rele(dd, FTAG); } void -dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, + zprop_source_t source, int intsz, int numints, const void *value, + dmu_tx_t *tx) { - dsl_dataset_t *ds = arg1; - dsl_prop_setarg_t *psa = arg2; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t zapobj, intval, dummy; int isint; char valbuf[32]; - char *valstr = NULL; + const char *valstr = NULL; char *inheritstr; char *recvdstr; char *tbuf = NULL; int err; uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa); - const char *propname = psa->psa_name; - zprop_source_t source = psa->psa_source; isint = (dodefault(propname, 8, 1, &intval) == 0); @@ -614,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) */ err = zap_remove(mos, zapobj, inheritstr, tx); ASSERT(err == 0 || err == ENOENT); - VERIFY(0 == zap_update(mos, zapobj, propname, - psa->psa_intsz, psa->psa_numints, psa->psa_value, tx)); + VERIFY0(zap_update(mos, zapobj, propname, + intsz, numints, value, tx)); break; case ZPROP_SRC_INHERITED: /* @@ -626,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) err = zap_remove(mos, zapobj, propname, tx); ASSERT(err == 0 || err == ENOENT); if (version >= SPA_VERSION_RECVD_PROPS && - dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, - NULL) == 0) { + dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) { dummy = 0; - err = zap_update(mos, zapobj, inheritstr, - 8, 1, &dummy, tx); - ASSERT(err == 0); + VERIFY0(zap_update(mos, zapobj, inheritstr, + 8, 1, &dummy, tx)); } break; case ZPROP_SRC_RECEIVED: @@ -639,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) * set propname$recvd -> value */ err = zap_update(mos, zapobj, recvdstr, - psa->psa_intsz, psa->psa_numints, psa->psa_value, tx); + intsz, numints, value, tx); ASSERT(err == 0); break; case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED): @@ -669,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) strfree(recvdstr); if (isint) { - VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL)); + VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval)); if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) { dsl_prop_cb_record_t *cbr; @@ -696,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) valstr = valbuf; } else { if (source == ZPROP_SRC_LOCAL) { - valstr = (char *)psa->psa_value; + valstr = value; } else { tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_PUSHPAGE); if (dsl_prop_get_ds(ds, propname, 1, @@ -713,118 +686,73 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) kmem_free(tbuf, ZAP_MAXVALUELEN); } -void -dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx) +int +dsl_prop_set_int(const char *dsname, const char *propname, + zprop_source_t source, uint64_t value) { - dsl_dataset_t *ds = arg1; - dsl_props_arg_t *pa = arg2; - nvlist_t *props = pa->pa_props; - dsl_prop_setarg_t psa; - nvpair_t *elem = NULL; - - psa.psa_source = pa->pa_source; - - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { - nvpair_t *pair = elem; - - psa.psa_name = nvpair_name(pair); + nvlist_t *nvl = fnvlist_alloc(); + int error; - if (nvpair_type(pair) == DATA_TYPE_NVLIST) { - /* - * dsl_prop_get_all_impl() returns properties in this - * format. - */ - nvlist_t *attrs; - VERIFY(nvpair_value_nvlist(pair, &attrs) == 0); - VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, - &pair) == 0); - } - - if (nvpair_type(pair) == DATA_TYPE_STRING) { - VERIFY(nvpair_value_string(pair, - (char **)&psa.psa_value) == 0); - psa.psa_intsz = 1; - psa.psa_numints = strlen(psa.psa_value) + 1; - } else { - uint64_t intval; - VERIFY(nvpair_value_uint64(pair, &intval) == 0); - psa.psa_intsz = sizeof (intval); - psa.psa_numints = 1; - psa.psa_value = &intval; - } - dsl_prop_set_sync(ds, &psa, tx); - } + fnvlist_add_uint64(nvl, propname, value); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); } int -dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source, - int intsz, int numints, const void *buf) +dsl_prop_set_string(const char *dsname, const char *propname, + zprop_source_t source, const char *value) { - dsl_dataset_t *ds; - uint64_t version; - int err; - dsl_prop_setarg_t psa; - - /* - * We must do these checks before we get to the syncfunc, since - * it can't fail. - */ - if (strlen(propname) >= ZAP_MAXNAMELEN) - return (ENAMETOOLONG); - - err = dsl_dataset_hold(dsname, FTAG, &ds); - if (err) - return (err); - - version = spa_version(ds->ds_dir->dd_pool->dp_spa); - if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ? - ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { - dsl_dataset_rele(ds, FTAG); - return (E2BIG); - } - if (dsl_dataset_is_snapshot(ds) && - version < SPA_VERSION_SNAP_PROPS) { - dsl_dataset_rele(ds, FTAG); - return (ENOTSUP); - } + nvlist_t *nvl = fnvlist_alloc(); + int error; - psa.psa_name = propname; - psa.psa_source = source; - psa.psa_intsz = intsz; - psa.psa_numints = numints; - psa.psa_value = buf; - psa.psa_effective_value = -1ULL; + fnvlist_add_string(nvl, propname, value); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); +} - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - NULL, dsl_prop_set_sync, ds, &psa, 2); +int +dsl_prop_inherit(const char *dsname, const char *propname, + zprop_source_t source) +{ + nvlist_t *nvl = fnvlist_alloc(); + int error; - dsl_dataset_rele(ds, FTAG); - return (err); + fnvlist_add_boolean(nvl, propname); + error = dsl_props_set(dsname, source, nvl); + fnvlist_free(nvl); + return (error); } -int -dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) +typedef struct dsl_props_set_arg { + const char *dpsa_dsname; + zprop_source_t dpsa_source; + nvlist_t *dpsa_props; +} dsl_props_set_arg_t; + +static int +dsl_props_set_check(void *arg, dmu_tx_t *tx) { + dsl_props_set_arg_t *dpsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; uint64_t version; nvpair_t *elem = NULL; - dsl_props_arg_t pa; int err; - if ((err = dsl_dataset_hold(dsname, FTAG, &ds))) + err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds); + if (err != 0) return (err); - /* - * Do these checks before the syncfunc, since it can't fail. - */ + version = spa_version(ds->ds_dir->dd_pool->dp_spa); - while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) { if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) { dsl_dataset_rele(ds, FTAG); return (ENAMETOOLONG); } if (nvpair_type(elem) == DATA_TYPE_STRING) { - char *valstr; - VERIFY(nvpair_value_string(elem, &valstr) == 0); + char *valstr = fnvpair_value_string(elem); if (strlen(valstr) >= (version < SPA_VERSION_STMF_PROP ? ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) { @@ -834,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) } } - if (dsl_dataset_is_snapshot(ds) && - version < SPA_VERSION_SNAP_PROPS) { + if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) { dsl_dataset_rele(ds, FTAG); return (ENOTSUP); } + dsl_dataset_rele(ds, FTAG); + return (0); +} + +void +dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source, + nvlist_t *props, dmu_tx_t *tx) +{ + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + nvpair_t *pair = elem; - pa.pa_props = props; - pa.pa_source = source; + if (nvpair_type(pair) == DATA_TYPE_NVLIST) { + /* + * dsl_prop_get_all_impl() returns properties in this + * format. + */ + nvlist_t *attrs = fnvpair_value_nvlist(pair); + pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE); + } + + if (nvpair_type(pair) == DATA_TYPE_STRING) { + const char *value = fnvpair_value_string(pair); + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, 1, strlen(value) + 1, value, tx); + } else if (nvpair_type(pair) == DATA_TYPE_UINT64) { + uint64_t intval = fnvpair_value_uint64(pair); + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, sizeof (intval), 1, &intval, tx); + } else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) { + dsl_prop_set_sync_impl(ds, nvpair_name(pair), + source, 0, 0, NULL, tx); + } else { + panic("invalid nvpair type"); + } + } +} - err = dsl_sync_task_do(ds->ds_dir->dd_pool, - NULL, dsl_props_set_sync, ds, &pa, 2); +static void +dsl_props_set_sync(void *arg, dmu_tx_t *tx) +{ + dsl_props_set_arg_t *dpsa = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds)); + dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx); dsl_dataset_rele(ds, FTAG); - return (err); +} + +/* + * All-or-nothing; if any prop can't be set, nothing will be modified. + */ +int +dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props) +{ + dsl_props_set_arg_t dpsa; + int nblks = 0; + + dpsa.dpsa_dsname = dsname; + dpsa.dpsa_source = source; + dpsa.dpsa_props = props; + + /* + * If the source includes NONE, then we will only be removing entries + * from the ZAP object. In that case don't check for ENOSPC. + */ + if ((source & ZPROP_SRC_NONE) == 0) + nblks = 2 * fnvlist_num_pairs(props); + + return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync, + &dpsa, nblks)); } typedef enum dsl_prop_getflags { @@ -997,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, if (dsl_dataset_is_snapshot(ds)) flags |= DSL_PROP_GET_SNAPSHOT; - rw_enter(&dp->dp_config_rwlock, RW_READER); + ASSERT(dsl_pool_config_held(dp)); if (ds->ds_phys->ds_props_obj != 0) { ASSERT(flags & DSL_PROP_GET_SNAPSHOT); @@ -1022,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, break; } out: - rw_exit(&dp->dp_config_rwlock); return (err); } boolean_t -dsl_prop_get_hasrecvd(objset_t *os) +dsl_prop_get_hasrecvd(const char *dsname) { - dsl_dataset_t *ds = os->os_dsl_dataset; - int rc; uint64_t dummy; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL); - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); - ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); - return (rc == 0); + return (0 == + dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL)); } -static void -dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source) +static int +dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source) { - dsl_dataset_t *ds = os->os_dsl_dataset; - uint64_t dummy = 0; - dsl_prop_setarg_t psa; - - if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS) - return; + uint64_t version; + spa_t *spa; + int error = 0; - dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy); + VERIFY0(spa_open(dsname, &spa, FTAG)); + version = spa_version(spa); + spa_close(spa, FTAG); - (void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL, - dsl_prop_set_sync, ds, &psa, 2); + if (version >= SPA_VERSION_RECVD_PROPS) + error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0); + return (error); } /* * Call after successfully receiving properties to ensure that only the first * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties. */ -void -dsl_prop_set_hasrecvd(objset_t *os) +int +dsl_prop_set_hasrecvd(const char *dsname) { - if (dsl_prop_get_hasrecvd(os)) { - ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS); - return; - } - dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL); + int error = 0; + if (!dsl_prop_get_hasrecvd(dsname)) + error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL); + return (error); } void -dsl_prop_unset_hasrecvd(objset_t *os) +dsl_prop_unset_hasrecvd(const char *dsname) { - dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE); + VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE)); } int @@ -1083,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp) } int -dsl_prop_get_received(objset_t *os, nvlist_t **nvp) +dsl_prop_get_received(const char *dsname, nvlist_t **nvp) { + objset_t *os; + int error; + /* * Received properties are not distinguishable from local properties * until the dataset has received properties on or after * SPA_VERSION_RECVD_PROPS. */ - dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ? + dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ? DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL); - return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags)); + + error = dmu_objset_hold(dsname, FTAG, &os); + if (error != 0) + return (error); + error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags); + dmu_objset_rele(os, FTAG); + return (error); } void @@ -1138,8 +1131,6 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value) #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dsl_prop_register); EXPORT_SYMBOL(dsl_prop_unregister); -EXPORT_SYMBOL(dsl_prop_numcb); -EXPORT_SYMBOL(dsl_prop_set); EXPORT_SYMBOL(dsl_prop_get); EXPORT_SYMBOL(dsl_prop_get_integer); EXPORT_SYMBOL(dsl_prop_get_all); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 90ca7b256606..2e5034bdffdc 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -53,7 +53,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); static scan_cb_t dsl_scan_scrub_cb; -static dsl_syncfunc_t dsl_scan_cancel_sync; +static void dsl_scan_cancel_sync(void *, dmu_tx_t *); static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ @@ -150,9 +150,9 @@ dsl_scan_fini(dsl_pool_t *dp) /* ARGSUSED */ static int -dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_setup_check(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (scn->scn_phys.scn_state == DSS_SCANNING) return (EBUSY); @@ -160,12 +160,11 @@ dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx) return (0); } -/* ARGSUSED */ static void -dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; - pool_scan_func_t *funcp = arg2; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; @@ -312,9 +311,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) /* ARGSUSED */ static int -dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_cancel_check(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; if (scn->scn_phys.scn_state != DSS_SCANNING) return (ENOENT); @@ -323,9 +322,9 @@ dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx) /* ARGSUSED */ static void -dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx) { - dsl_scan_t *scn = arg1; + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; dsl_scan_done(scn, B_FALSE, tx); dsl_scan_sync_state(scn, tx); @@ -334,12 +333,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx) int dsl_scan_cancel(dsl_pool_t *dp) { - boolean_t complete = B_FALSE; - int err; - - err = dsl_sync_task_do(dp, dsl_scan_cancel_check, - dsl_scan_cancel_sync, dp->dp_scan, &complete, 3); - return (err); + return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check, + dsl_scan_cancel_sync, NULL, 3)); } static void dsl_scan_visitbp(blkptr_t *bp, @@ -375,7 +370,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds) static void dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) { - VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset, + VERIFY0(zap_update(scn->scn_dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys, tx)); @@ -959,33 +954,33 @@ struct enqueue_clones_arg { /* ARGSUSED */ static int -enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { struct enqueue_clones_arg *eca = arg; dsl_dataset_t *ds; int err; - dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj) + return (0); + + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); - if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) { - while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { - dsl_dataset_t *prev; - err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { + dsl_dataset_t *prev; + err = dsl_dataset_hold_obj(dp, + ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); - dsl_dataset_rele(ds, FTAG); - if (err) - return (err); - ds = prev; - } - VERIFY(zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds->ds_object, - ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); + dsl_dataset_rele(ds, FTAG); + if (err) + return (err); + ds = prev; } + VERIFY(zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds->ds_object, + ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); dsl_dataset_rele(ds, FTAG); return (0); } @@ -1075,17 +1070,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) } if (usenext) { - VERIFY(zap_join_key(dp->dp_meta_objset, + VERIFY0(zap_join_key(dp->dp_meta_objset, ds->ds_phys->ds_next_clones_obj, scn->scn_phys.scn_queue_obj, - ds->ds_phys->ds_creation_txg, tx) == 0); + ds->ds_phys->ds_creation_txg, tx)); } else { struct enqueue_clones_arg eca; eca.tx = tx; eca.originobj = ds->ds_object; - (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa, - NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN); + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + enqueue_clones_cb, &eca, DS_FIND_CHILDREN)); } } @@ -1095,15 +1090,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) /* ARGSUSED */ static int -enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) +enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds; int err; - dsl_pool_t *dp = spa->spa_dsl_pool; dsl_scan_t *scn = dp->dp_scan; - err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); @@ -1261,8 +1255,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) return; if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { - VERIFY(0 == dmu_objset_find_spa(dp->dp_spa, - NULL, enqueue_cb, tx, DS_FIND_CHILDREN)); + VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + enqueue_cb, tx, DS_FIND_CHILDREN)); } else { dsl_scan_visitds(scn, dp->dp_origin_snap->ds_object, tx); @@ -1402,7 +1396,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) func = POOL_SCAN_RESILVER; zfs_dbgmsg("restarting scan func=%u txg=%llu", func, tx->tx_txg); - dsl_scan_setup_sync(scn, &func, tx); + dsl_scan_setup_sync(&func, tx); } if (!dsl_scan_active(scn) || @@ -1436,21 +1430,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) err = bptree_iterate(dp->dp_meta_objset, dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx); - VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); - if (err != 0) - return; - - /* disable async destroy feature */ - spa_feature_decr(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx); - ASSERT(!spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])); - VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, - DMU_POOL_DIRECTORY_OBJECT, - DMU_POOL_BPTREE_OBJ, tx)); - VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset, - dp->dp_bptree_obj, tx)); - dp->dp_bptree_obj = 0; + VERIFY0(zio_wait(scn->scn_zio_root)); + + if (err == 0) { + zfeature_info_t *feat = &spa_feature_table + [SPA_FEATURE_ASYNC_DESTROY]; + /* finished; deactivate async destroy feature */ + spa_feature_decr(spa, feat, tx); + ASSERT(!spa_feature_is_active(spa, feat)); + VERIFY0(zap_remove(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_BPTREE_OBJ, tx)); + VERIFY0(bptree_free(dp->dp_meta_objset, + dp->dp_bptree_obj, tx)); + dp->dp_bptree_obj = 0; + } } if (scn->scn_visited_this_txg) { zfs_dbgmsg("freed %llu blocks in %llums from " @@ -1497,7 +1491,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_CANFAIL); + dsl_pool_config_enter(dp, FTAG); dsl_scan_visit(scn, tx); + dsl_pool_config_exit(dp, FTAG); (void) zio_wait(scn->scn_zio_root); scn->scn_zio_root = NULL; @@ -1734,8 +1730,8 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) spa->spa_scrub_reopen = B_FALSE; (void) spa_vdev_state_exit(spa, NULL, 0); - return (dsl_sync_task_do(dp, dsl_scan_setup_check, - dsl_scan_setup_sync, dp->dp_scan, &func, 0)); + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, + dsl_scan_setup_sync, &func, 0)); } #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c index 2ed47fe0c95c..6eb712314db0 100644 --- a/module/zfs/dsl_synctask.c +++ b/module/zfs/dsl_synctask.c @@ -34,138 +34,115 @@ /* ARGSUSED */ static int -dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx) +dsl_null_checkfunc(void *arg, dmu_tx_t *tx) { return (0); } -dsl_sync_task_group_t * -dsl_sync_task_group_create(dsl_pool_t *dp) -{ - dsl_sync_task_group_t *dstg; - - dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP); - list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t), - offsetof(dsl_sync_task_t, dst_node)); - dstg->dstg_pool = dp; - - return (dstg); -} - -void -dsl_sync_task_create(dsl_sync_task_group_t *dstg, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified) -{ - dsl_sync_task_t *dst; - - if (checkfunc == NULL) - checkfunc = dsl_null_checkfunc; - dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP); - dst->dst_checkfunc = checkfunc; - dst->dst_syncfunc = syncfunc; - dst->dst_arg1 = arg1; - dst->dst_arg2 = arg2; - list_insert_tail(&dstg->dstg_tasks, dst); - - dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT; -} - +/* + * Called from open context to perform a callback in syncing context. Waits + * for the operation to complete. + * + * The checkfunc will be called from open context as a preliminary check + * which can quickly fail. If it succeeds, it will be called again from + * syncing context. The checkfunc should generally be designed to work + * properly in either context, but if necessary it can check + * dmu_tx_is_syncing(tx). + * + * The synctask infrastructure enforces proper locking strategy with respect + * to the dp_config_rwlock -- the lock will always be held when the callbacks + * are called. It will be held for read during the open-context (preliminary) + * call to the checkfunc, and then held for write from syncing context during + * the calls to the check and sync funcs. + * + * A dataset or pool name can be passed as the first argument. Typically, + * the check func will hold, check the return value of the hold, and then + * release the dataset. The sync func will VERIFYO(hold()) the dataset. + * This is safe because no changes can be made between the check and sync funcs, + * and the sync func will only be called if the check func successfully opened + * the dataset. + */ int -dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg) +dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified) { + spa_t *spa; dmu_tx_t *tx; - uint64_t txg; - dsl_sync_task_t *dst; - -top: - tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir); - VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT)); - - txg = dmu_tx_get_txg(tx); + int err; + dsl_sync_task_t dst = { { { NULL } } }; + dsl_pool_t *dp; - /* Do a preliminary error check. */ - dstg->dstg_err = 0; -#ifdef ZFS_DEBUG - /* - * Only check half the time, otherwise, the sync-context - * check will almost never fail. - */ - if (spa_get_random(2) == 0) - goto skip; -#endif - rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - dst->dst_err = - dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); - if (dst->dst_err) - dstg->dstg_err = dst->dst_err; - } - rw_exit(&dstg->dstg_pool->dp_config_rwlock); + err = spa_open(pool, &spa, FTAG); + if (err != 0) + return (err); + dp = spa_get_dsl(spa); - if (dstg->dstg_err) { +top: + tx = dmu_tx_create_dd(dp->dp_mos_dir); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); + + dst.dst_pool = dp; + dst.dst_txg = dmu_tx_get_txg(tx); + dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT; + dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc; + dst.dst_syncfunc = syncfunc; + dst.dst_arg = arg; + dst.dst_error = 0; + dst.dst_nowaiter = B_FALSE; + + dsl_pool_config_enter(dp, FTAG); + err = dst.dst_checkfunc(arg, tx); + dsl_pool_config_exit(dp, FTAG); + + if (err != 0) { dmu_tx_commit(tx); - return (dstg->dstg_err); + spa_close(spa, FTAG); + return (err); } -#ifdef ZFS_DEBUG -skip: -#endif - /* - * We don't generally have many sync tasks, so pay the price of - * add_tail to get the tasks executed in the right order. - */ - VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, - dstg, txg)); + VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg)); dmu_tx_commit(tx); - txg_wait_synced(dstg->dstg_pool, txg); + txg_wait_synced(dp, dst.dst_txg); - if (dstg->dstg_err == EAGAIN) { - txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE); + if (dst.dst_error == EAGAIN) { + txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE); goto top; } - return (dstg->dstg_err); + spa_close(spa, FTAG); + return (dst.dst_error); } void -dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg, + int blocks_modified, dmu_tx_t *tx) { - uint64_t txg; + dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP); - dstg->dstg_nowaiter = B_TRUE; - txg = dmu_tx_get_txg(tx); - /* - * We don't generally have many sync tasks, so pay the price of - * add_tail to get the tasks executed in the right order. - */ - VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks, - dstg, txg)); -} - -void -dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg) -{ - dsl_sync_task_t *dst; + dst->dst_pool = dp; + dst->dst_txg = dmu_tx_get_txg(tx); + dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT; + dst->dst_checkfunc = dsl_null_checkfunc; + dst->dst_syncfunc = syncfunc; + dst->dst_arg = arg; + dst->dst_error = 0; + dst->dst_nowaiter = B_TRUE; - while ((dst = list_head(&dstg->dstg_tasks))) { - list_remove(&dstg->dstg_tasks, dst); - kmem_free(dst, sizeof (dsl_sync_task_t)); - } - kmem_free(dstg, sizeof (dsl_sync_task_group_t)); + VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg)); } +/* + * Called in syncing context to execute the synctask. + */ void -dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) +dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx) { - dsl_sync_task_t *dst; - dsl_pool_t *dp = dstg->dstg_pool; + dsl_pool_t *dp = dst->dst_pool; uint64_t quota, used; - ASSERT0(dstg->dstg_err); + ASSERT0(dst->dst_error); /* * Check for sufficient space. We just check against what's @@ -177,70 +154,24 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx) metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); used = dp->dp_root_dir->dd_phys->dd_used_bytes; /* MOS space is triple-dittoed, so we multiply by 3. */ - if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) { - dstg->dstg_err = ENOSPC; + if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) { + dst->dst_error = ENOSPC; + if (dst->dst_nowaiter) + kmem_free(dst, sizeof (*dst)); return; } /* - * Check for errors by calling checkfuncs. + * Check for errors by calling checkfunc. */ - rw_enter(&dp->dp_config_rwlock, RW_WRITER); - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - dst->dst_err = - dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx); - if (dst->dst_err) - dstg->dstg_err = dst->dst_err; - } - - if (dstg->dstg_err == 0) { - /* - * Execute sync tasks. - */ - for (dst = list_head(&dstg->dstg_tasks); dst; - dst = list_next(&dstg->dstg_tasks, dst)) { - dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx); - } - } - rw_exit(&dp->dp_config_rwlock); - - if (dstg->dstg_nowaiter) - dsl_sync_task_group_destroy(dstg); -} - -int -dsl_sync_task_do(dsl_pool_t *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified) -{ - dsl_sync_task_group_t *dstg; - int err; - - ASSERT(spa_writeable(dp->dp_spa)); - - dstg = dsl_sync_task_group_create(dp); - dsl_sync_task_create(dstg, checkfunc, syncfunc, - arg1, arg2, blocks_modified); - err = dsl_sync_task_group_wait(dstg); - dsl_sync_task_group_destroy(dstg); - return (err); -} - -void -dsl_sync_task_do_nowait(dsl_pool_t *dp, - dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, - void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx) -{ - dsl_sync_task_group_t *dstg; - - dstg = dsl_sync_task_group_create(dp); - dsl_sync_task_create(dstg, checkfunc, syncfunc, - arg1, arg2, blocks_modified); - dsl_sync_task_group_nowait(dstg, tx); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx); + if (dst->dst_error == 0) + dst->dst_syncfunc(dst->dst_arg, tx); + rrw_exit(&dp->dp_config_rwlock, FTAG); + if (dst->dst_nowaiter) + kmem_free(dst, sizeof (*dst)); } #if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(dsl_sync_task_do); -EXPORT_SYMBOL(dsl_sync_task_do_nowait); #endif diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c new file mode 100644 index 000000000000..c8bc4424f857 --- /dev/null +++ b/module/zfs/dsl_userhold.c @@ -0,0 +1,537 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct dsl_dataset_user_hold_arg { + nvlist_t *dduha_holds; + nvlist_t *dduha_errlist; + minor_t dduha_minor; +} dsl_dataset_user_hold_arg_t; + +/* + * If you add new checks here, you may need to add additional checks to the + * "temporary" case in snapshot_check() in dmu_objset.c. + */ +int +dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag, + boolean_t temphold, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; + int error = 0; + + if (strlen(htag) > MAXNAMELEN) + return (E2BIG); + /* Tempholds have a more restricted length */ + if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) + return (E2BIG); + + /* tags must be unique (if ds already exists) */ + if (ds != NULL) { + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj != 0) { + uint64_t value; + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + htag, 8, 1, &value); + if (error == 0) + error = EEXIST; + else if (error == ENOENT) + error = 0; + } + mutex_exit(&ds->ds_lock); + } + + return (error); +} + +static int +dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_hold_arg_t *dduha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; + + if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) + return (ENOTSUP); + + for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + int error = 0; + dsl_dataset_t *ds; + char *htag; + + /* must be a snapshot */ + if (strchr(nvpair_name(pair), '@') == NULL) + error = EINVAL; + + if (error == 0) + error = nvpair_value_string(pair, &htag); + if (error == 0) { + error = dsl_dataset_hold(dp, + nvpair_name(pair), FTAG, &ds); + } + if (error == 0) { + error = dsl_dataset_user_hold_check_one(ds, htag, + dduha->dduha_minor != 0, tx); + dsl_dataset_rele(ds, FTAG); + } + + if (error != 0) { + rv = error; + fnvlist_add_int32(dduha->dduha_errlist, + nvpair_name(pair), error); + } + } + return (rv); +} + +void +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, + minor_t minor, uint64_t now, dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj; + + mutex_enter(&ds->ds_lock); + if (ds->ds_phys->ds_userrefs_obj == 0) { + /* + * This is the first user hold for this dataset. Create + * the userrefs zap object. + */ + dmu_buf_will_dirty(ds->ds_dbuf, tx); + zapobj = ds->ds_phys->ds_userrefs_obj = + zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); + } else { + zapobj = ds->ds_phys->ds_userrefs_obj; + } + ds->ds_userrefs++; + mutex_exit(&ds->ds_lock); + + VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx)); + + if (minor != 0) { + VERIFY0(dsl_pool_user_hold(dp, ds->ds_object, + htag, now, tx)); + dsl_register_onexit_hold_cleanup(ds, htag, minor); + } + + spa_history_log_internal_ds(ds, "hold", tx, + "tag=%s temp=%d refs=%llu", + htag, minor != 0, ds->ds_userrefs); +} + +static void +dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_hold_arg_t *dduha = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + uint64_t now = gethrestime_sec(); + + for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair), + dduha->dduha_minor, now, tx); + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * holds is nvl of snapname -> holdname + * errlist will be filled in with snapname -> error + * if cleanup_minor is not 0, the holds will be temporary, cleaned up + * when the process exits. + * + * if any fails, all will fail. + */ +int +dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) +{ + dsl_dataset_user_hold_arg_t dduha; + nvpair_t *pair; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + dduha.dduha_holds = holds; + dduha.dduha_errlist = errlist; + dduha.dduha_minor = cleanup_minor; + + return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check, + dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds))); +} + +typedef struct dsl_dataset_user_release_arg { + nvlist_t *ddura_holds; + nvlist_t *ddura_todelete; + nvlist_t *ddura_errlist; +} dsl_dataset_user_release_arg_t; + +static int +dsl_dataset_user_release_check_one(dsl_dataset_t *ds, + nvlist_t *holds, boolean_t *todelete) +{ + uint64_t zapobj; + nvpair_t *pair; + objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; + int error; + int numholds = 0; + + *todelete = B_FALSE; + + if (!dsl_dataset_is_snapshot(ds)) + return (EINVAL); + + zapobj = ds->ds_phys->ds_userrefs_obj; + if (zapobj == 0) + return (ESRCH); + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + /* Make sure the hold exists */ + uint64_t tmp; + error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp); + if (error == ENOENT) + error = ESRCH; + if (error != 0) + return (error); + numholds++; + } + + if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && + ds->ds_userrefs == numholds) { + /* we need to destroy the snapshot as well */ + + if (dsl_dataset_long_held(ds)) + return (EBUSY); + *todelete = B_TRUE; + } + return (0); +} + +static int +dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + int rv = 0; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + const char *name = nvpair_name(pair); + int error; + dsl_dataset_t *ds; + nvlist_t *holds; + + error = nvpair_value_nvlist(pair, &holds); + if (error != 0) + return (EINVAL); + + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error == 0) { + boolean_t deleteme; + error = dsl_dataset_user_release_check_one(ds, + holds, &deleteme); + if (error == 0 && deleteme) { + fnvlist_add_boolean(ddura->ddura_todelete, + name); + } + dsl_dataset_rele(ds, FTAG); + } + if (error != 0) { + if (ddura->ddura_errlist != NULL) { + fnvlist_add_int32(ddura->ddura_errlist, + name, error); + } + rv = error; + } + } + return (rv); +} + +static void +dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds, + dmu_tx_t *tx) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t zapobj; + int error; + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + ds->ds_userrefs--; + error = dsl_pool_user_release(dp, ds->ds_object, + nvpair_name(pair), tx); + VERIFY(error == 0 || error == ENOENT); + zapobj = ds->ds_phys->ds_userrefs_obj; + VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx)); + + spa_history_log_internal_ds(ds, "release", tx, + "tag=%s refs=%lld", nvpair_name(pair), + (longlong_t)ds->ds_userrefs); + } +} + +static void +dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_arg_t *ddura = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + nvpair_t *pair; + + for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) { + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); + dsl_dataset_user_release_sync_one(ds, + fnvpair_value_nvlist(pair), tx); + if (nvlist_exists(ddura->ddura_todelete, + nvpair_name(pair))) { + ASSERT(ds->ds_userrefs == 0 && + ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)); + dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); + } + dsl_dataset_rele(ds, FTAG); + } +} + +/* + * holds is nvl of snapname -> { holdname, ... } + * errlist will be filled in with snapname -> error + * + * if any fails, all will fail. + */ +int +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist) +{ + dsl_dataset_user_release_arg_t ddura; + nvpair_t *pair; + int error; + + pair = nvlist_next_nvpair(holds, NULL); + if (pair == NULL) + return (0); + + ddura.ddura_holds = holds; + ddura.ddura_errlist = errlist; + ddura.ddura_todelete = fnvlist_alloc(); + + error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check, + dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds)); + fnvlist_free(ddura.ddura_todelete); + return (error); +} + +typedef struct dsl_dataset_user_release_tmp_arg { + uint64_t ddurta_dsobj; + nvlist_t *ddurta_holds; + boolean_t ddurta_deleteme; +} dsl_dataset_user_release_tmp_arg_t; + +static int +dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_tmp_arg_t *ddurta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + int error; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds); + if (error) + return (error); + + error = dsl_dataset_user_release_check_one(ds, + ddurta->ddurta_holds, &ddurta->ddurta_deleteme); + dsl_dataset_rele(ds, FTAG); + return (error); +} + +static void +dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_user_release_tmp_arg_t *ddurta = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds)); + dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx); + if (ddurta->ddurta_deleteme) { + ASSERT(ds->ds_userrefs == 0 && + ds->ds_phys->ds_num_children == 1 && + DS_IS_DEFER_DESTROY(ds)); + dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); + } + dsl_dataset_rele(ds, FTAG); +} + +/* + * Called at spa_load time to release a stale temporary user hold. + * Also called by the onexit code. + */ +void +dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag) +{ + dsl_dataset_user_release_tmp_arg_t ddurta; + +#ifdef _KERNEL + dsl_dataset_t *ds; + int error; + + /* Make sure it is not mounted. */ + dsl_pool_config_enter(dp, FTAG); + error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds); + if (error == 0) { + char name[MAXNAMELEN]; + dsl_dataset_name(ds, name); + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + zfs_unmount_snap(name); + } else { + dsl_pool_config_exit(dp, FTAG); + } +#endif + + ddurta.ddurta_dsobj = dsobj; + ddurta.ddurta_holds = fnvlist_alloc(); + fnvlist_add_boolean(ddurta.ddurta_holds, htag); + + (void) dsl_sync_task(spa_name(dp->dp_spa), + dsl_dataset_user_release_tmp_check, + dsl_dataset_user_release_tmp_sync, &ddurta, 1); + fnvlist_free(ddurta.ddurta_holds); +} + +typedef struct zfs_hold_cleanup_arg { + char zhca_spaname[MAXNAMELEN]; + uint64_t zhca_spa_load_guid; + uint64_t zhca_dsobj; + char zhca_htag[MAXNAMELEN]; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + spa_t *spa; + int error; + + error = spa_open(ca->zhca_spaname, &spa, FTAG); + if (error != 0) { + zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + "because pool is no longer loaded", + ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + return; + } + if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { + zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s " + "because pool is no longer loaded (guid doesn't match)", + ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag); + spa_close(spa, FTAG); + return; + } + + dsl_dataset_user_release_tmp(spa_get_dsl(spa), + ca->zhca_dsobj, ca->zhca_htag); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); + spa_close(spa, FTAG); +} + +void +dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag, + minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP); + spa_t *spa = dsl_dataset_get_spa(ds); + (void) strlcpy(ca->zhca_spaname, spa_name(spa), + sizeof (ca->zhca_spaname)); + ca->zhca_spa_load_guid = spa_load_guid(spa); + ca->zhca_dsobj = ds->ds_object; + (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag)); + VERIFY0(zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + +int +dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds; + int err; + + err = dsl_pool_hold(dsname, FTAG, &dp); + if (err != 0) + return (err); + err = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (err != 0) { + dsl_pool_rele(dp, FTAG); + return (err); + } + + if (ds->ds_phys->ds_userrefs_obj != 0) { + zap_attribute_t *za; + zap_cursor_t zc; + + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_phys->ds_userrefs_obj); + zap_cursor_retrieve(&zc, za) == 0; + zap_cursor_advance(&zc)) { + fnvlist_add_uint64(nvl, za->za_name, + za->za_first_integer); + } + zap_cursor_fini(&zc); + kmem_free(za, sizeof (zap_attribute_t)); + } + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + return (0); +} diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index cd1b6ce730f4..f9cb8cead815 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1928,6 +1928,46 @@ void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp) spa_config_exit(spa, SCL_VDEV, FTAG); } +static void +checkmap(space_map_t *sm, uint64_t off, uint64_t size) +{ + space_seg_t *ss; + avl_index_t where; + + mutex_enter(sm->sm_lock); + ss = space_map_find(sm, off, size, &where); + if (ss != NULL) + panic("freeing free block; ss=%p", (void *)ss); + mutex_exit(sm->sm_lock); +} + +void +metaslab_check_free(spa_t *spa, const blkptr_t *bp) +{ + int i, j; + + if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0) + return; + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + for (i = 0; i < BP_GET_NDVAS(bp); i++) { + uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]); + vdev_t *vd = vdev_lookup_top(spa, vdid); + uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]); + uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]); + metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift]; + + if (ms->ms_map->sm_loaded) + checkmap(ms->ms_map, off, size); + + for (j = 0; j < TXG_SIZE; j++) + checkmap(ms->ms_freemap[j], off, size); + for (j = 0; j < TXG_DEFER_SIZE; j++) + checkmap(ms->ms_defermap[j], off, size); + } + spa_config_exit(spa, SCL_VDEV, FTAG); +} + #if defined(_KERNEL) && defined(HAVE_SPL) module_param(metaslab_debug, int, 0644); MODULE_PARM_DESC(metaslab_debug, "keep space maps in core to verify frees"); diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index e43807c8e3f4..49980efcc85d 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */ #else int reference_tracking_enable = TRUE; #endif -int reference_history = 4; /* tunable */ +int reference_history = 3; /* tunable */ static kmem_cache_t *reference_cache; static kmem_cache_t *reference_history_cache; @@ -64,6 +65,14 @@ refcount_create(refcount_t *rc) offsetof(reference_t, ref_link)); rc->rc_count = 0; rc->rc_removed_count = 0; + rc->rc_tracked = reference_tracking_enable; +} + +void +refcount_create_untracked(refcount_t *rc) +{ + refcount_create(rc); + rc->rc_tracked = B_FALSE; } void @@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc) int refcount_is_zero(refcount_t *rc) { - ASSERT(rc->rc_count >= 0); return (rc->rc_count == 0); } int64_t refcount_count(refcount_t *rc) { - ASSERT(rc->rc_count >= 0); return (rc->rc_count); } @@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder) reference_t *ref = NULL; int64_t count; - if (reference_tracking_enable) { + if (rc->rc_tracked) { ref = kmem_cache_alloc(reference_cache, KM_PUSHPAGE); ref->ref_holder = holder; ref->ref_number = number; } mutex_enter(&rc->rc_mtx); ASSERT(rc->rc_count >= 0); - if (reference_tracking_enable) + if (rc->rc_tracked) list_insert_head(&rc->rc_list, ref); rc->rc_count += number; count = rc->rc_count; @@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) mutex_enter(&rc->rc_mtx); ASSERT(rc->rc_count >= number); - if (!reference_tracking_enable) { + if (!rc->rc_tracked) { rc->rc_count -= number; count = rc->rc_count; mutex_exit(&rc->rc_mtx); @@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) KM_PUSHPAGE); list_insert_head(&rc->rc_removed, ref); rc->rc_removed_count++; - if (rc->rc_removed_count >= reference_history) { + if (rc->rc_removed_count > reference_history) { ref = list_tail(&rc->rc_removed); list_remove(&rc->rc_removed, ref); kmem_cache_free(reference_history_cache, diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 7f9290bd44c1..8e80166c7d14 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -75,8 +75,9 @@ uint_t rrw_tsd_key; typedef struct rrw_node { - struct rrw_node *rn_next; - rrwlock_t *rn_rrl; + struct rrw_node *rn_next; + rrwlock_t *rn_rrl; + void *rn_tag; } rrw_node_t; static rrw_node_t * @@ -98,13 +99,14 @@ rrn_find(rrwlock_t *rrl) * Add a node to the head of the singly linked list. */ static void -rrn_add(rrwlock_t *rrl) +rrn_add(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rn = kmem_alloc(sizeof (*rn), KM_SLEEP); rn->rn_rrl = rrl; rn->rn_next = tsd_get(rrw_tsd_key); + rn->rn_tag = tag; VERIFY(tsd_set(rrw_tsd_key, rn) == 0); } @@ -113,7 +115,7 @@ rrn_add(rrwlock_t *rrl) * thread's list and return TRUE; otherwise return FALSE. */ static boolean_t -rrn_find_and_remove(rrwlock_t *rrl) +rrn_find_and_remove(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; rrw_node_t *prev = NULL; @@ -122,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl) return (B_FALSE); for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { - if (rn->rn_rrl == rrl) { + if (rn->rn_rrl == rrl && rn->rn_tag == tag) { if (prev) prev->rn_next = rn->rn_next; else @@ -136,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl) } void -rrw_init(rrwlock_t *rrl) +rrw_init(rrwlock_t *rrl, boolean_t track_all) { mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL); @@ -144,6 +146,7 @@ rrw_init(rrwlock_t *rrl) refcount_create(&rrl->rr_anon_rcount); refcount_create(&rrl->rr_linked_rcount); rrl->rr_writer_wanted = B_FALSE; + rrl->rr_track_all = track_all; } void @@ -156,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl) refcount_destroy(&rrl->rr_linked_rcount); } -static void +void rrw_enter_read(rrwlock_t *rrl, void *tag) { mutex_enter(&rrl->rr_lock); #if !defined(DEBUG) && defined(_KERNEL) - if (!rrl->rr_writer && !rrl->rr_writer_wanted) { + if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted && + !rrl->rr_track_all) { rrl->rr_anon_rcount.rc_count++; mutex_exit(&rrl->rr_lock); return; @@ -171,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag) ASSERT(rrl->rr_writer != curthread); ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0); - while (rrl->rr_writer || (rrl->rr_writer_wanted && + while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted && refcount_is_zero(&rrl->rr_anon_rcount) && rrn_find(rrl) == NULL)) cv_wait(&rrl->rr_cv, &rrl->rr_lock); - if (rrl->rr_writer_wanted) { + if (rrl->rr_writer_wanted || rrl->rr_track_all) { /* may or may not be a re-entrant enter */ - rrn_add(rrl); + rrn_add(rrl, tag); (void) refcount_add(&rrl->rr_linked_rcount, tag); } else { (void) refcount_add(&rrl->rr_anon_rcount, tag); @@ -187,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag) mutex_exit(&rrl->rr_lock); } -static void +void rrw_enter_write(rrwlock_t *rrl) { mutex_enter(&rrl->rr_lock); @@ -233,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag) if (rrl->rr_writer == NULL) { int64_t count; - if (rrn_find_and_remove(rrl)) + if (rrn_find_and_remove(rrl, tag)) { count = refcount_remove(&rrl->rr_linked_rcount, tag); - else + } else { + ASSERT(!rrl->rr_track_all); count = refcount_remove(&rrl->rr_anon_rcount, tag); + } if (count == 0) cv_broadcast(&rrl->rr_cv); } else { @@ -249,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag) mutex_exit(&rrl->rr_lock); } +/* + * If the lock was created with track_all, rrw_held(RW_READER) will return + * B_TRUE iff the current thread has the lock for reader. Otherwise it may + * return B_TRUE if any thread has the lock for reader. + */ boolean_t rrw_held(rrwlock_t *rrl, krw_t rw) { @@ -259,7 +270,7 @@ rrw_held(rrwlock_t *rrl, krw_t rw) held = (rrl->rr_writer == curthread); } else { held = (!refcount_is_zero(&rrl->rr_anon_rcount) || - !refcount_is_zero(&rrl->rr_linked_rcount)); + rrn_find(rrl) != NULL); } mutex_exit(&rrl->rr_lock); diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 581cf4b0daf3..bad6123aa028 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1019,10 +1019,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, sa_attr_type_t *tb; int error; - mutex_enter(&os->os_lock); + mutex_enter(&os->os_user_ptr_lock); if (os->os_sa) { mutex_enter(&os->os_sa->sa_lock); - mutex_exit(&os->os_lock); + mutex_exit(&os->os_user_ptr_lock); tb = os->os_sa->sa_user_table; mutex_exit(&os->os_sa->sa_lock); *user_table = tb; @@ -1035,7 +1035,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, os->os_sa = sa; mutex_enter(&sa->sa_lock); - mutex_exit(&os->os_lock); + mutex_exit(&os->os_user_ptr_lock); avl_create(&sa->sa_layout_num_tree, layout_num_compare, sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node)); avl_create(&sa->sa_layout_hash_tree, layout_hash_compare, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7c37ca426d4a..fcb1711a211b 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #ifdef _KERNEL @@ -131,10 +132,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ }; -static dsl_syncfunc_t spa_sync_version; -static dsl_syncfunc_t spa_sync_props; -static dsl_checkfunc_t spa_change_guid_check; -static dsl_syncfunc_t spa_change_guid_sync; +static void spa_sync_version(void *arg, dmu_tx_t *tx); +static void spa_sync_props(void *arg, dmu_tx_t *tx); static boolean_t spa_has_active_shared_spare(spa_t *spa); static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, @@ -329,10 +328,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) dsl_dataset_t *ds = NULL; dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); + dsl_pool_config_enter(dp, FTAG); if ((err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &ds))) { - rw_exit(&dp->dp_config_rwlock); + dsl_pool_config_exit(dp, FTAG); break; } @@ -341,7 +340,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) KM_PUSHPAGE); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); - rw_exit(&dp->dp_config_rwlock); + dsl_pool_config_exit(dp, FTAG); } else { strval = NULL; intval = za.za_first_integer; @@ -495,9 +494,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (dmu_objset_type(os) != DMU_OST_ZFS) { error = ENOTSUP; - } else if ((error = dsl_prop_get_integer(strval, + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), zfs_prop_to_name(ZFS_PROP_COMPRESSION), - &compress, NULL)) == 0 && + &compress)) == 0 && !BOOTFS_COMPRESS_VALID(compress)) { error = ENOTSUP; } else { @@ -661,8 +661,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) * read object, the features for write object, or the * feature descriptions object. */ - error = dsl_sync_task_do(spa_get_dsl(spa), NULL, - spa_sync_version, spa, &ver, 6); + error = dsl_sync_task(spa->spa_name, NULL, + spa_sync_version, &ver, 6); if (error) return (error); continue; @@ -673,8 +673,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp) } if (need_sync) { - return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, - spa, nvp, 6)); + return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, + nvp, 6)); } return (0); @@ -696,12 +696,12 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) /*ARGSUSED*/ static int -spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) +spa_change_guid_check(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; vdev_t *rvd = spa->spa_root_vdev; uint64_t vdev_state; - ASSERTV(uint64_t *newguid = arg2); + ASSERTV(uint64_t *newguid = arg); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vdev_state = rvd->vdev_state; @@ -716,10 +716,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) } static void -spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) +spa_change_guid_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - uint64_t *newguid = arg2; + uint64_t *newguid = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; uint64_t oldguid; vdev_t *rvd = spa->spa_root_vdev; @@ -753,8 +753,8 @@ spa_change_guid(spa_t *spa) mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); - error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check, - spa_change_guid_sync, spa, &guid, 5); + error = dsl_sync_task(spa->spa_name, spa_change_guid_check, + spa_change_guid_sync, &guid, 5); if (error == 0) { spa_config_sync(spa, B_FALSE, B_TRUE); @@ -1729,23 +1729,24 @@ spa_config_valid(spa_t *spa, nvlist_t *config) /* * Check for missing log devices */ -static int +static boolean_t spa_check_logs(spa_t *spa) { + boolean_t rv = B_FALSE; + switch (spa->spa_log_state) { default: break; case SPA_LOG_MISSING: /* need to recheck in case slog has been restored */ case SPA_LOG_UNKNOWN: - if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, - DS_FIND_CHILDREN)) { + rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain, + NULL, DS_FIND_CHILDREN) != 0); + if (rv) spa_set_log_state(spa, SPA_LOG_MISSING); - return (1); - } break; } - return (0); + return (rv); } static boolean_t @@ -1793,11 +1794,11 @@ spa_activate_log(spa_t *spa) int spa_offline_log(spa_t *spa) { - int error = 0; - - if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline, - NULL, DS_FIND_CHILDREN)) == 0) { + int error; + error = dmu_objset_find(spa_name(spa), zil_vdev_offline, + NULL, DS_FIND_CHILDREN); + if (error == 0) { /* * We successfully offlined the log device, sync out the * current txg so that the "stubby" block can be removed @@ -3610,7 +3611,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); - spa_sync_props(spa, props, tx); + spa_sync_props(props, tx); } dmu_tx_commit(tx); @@ -3844,7 +3845,7 @@ spa_import_rootpool(char *devpath, char *devid) * Import a non-root pool into the system. */ int -spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) +spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) { spa_t *spa; char *altroot = NULL; @@ -5878,10 +5879,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) } static void -spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) +spa_sync_version(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - uint64_t version = *(uint64_t *)arg2; + uint64_t *versionp = arg; + uint64_t version = *versionp; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; /* * Setting the version is special cased when first creating the pool. @@ -5900,11 +5902,11 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) * Set zpool properties. */ static void -spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) +spa_sync_props(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; + nvlist_t *nvp = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; - nvlist_t *nvp = arg2; nvpair_t *elem = NULL; mutex_enter(&spa->spa_props_lock); @@ -6056,6 +6058,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) ASSERT(spa->spa_sync_pass == 1); + rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); + if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { dsl_pool_create_origin(dp, tx); @@ -6081,6 +6085,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { spa_feature_create_zap_objects(spa, tx); } + rrw_exit(&dp->dp_config_rwlock, FTAG); } /* diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 79d48620c9e1..bbcd697e00a8 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -197,10 +197,10 @@ spa_history_zone(void) */ /*ARGSUSED*/ static void -spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) +spa_history_log_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - nvlist_t *nvl = arg2; + nvlist_t *nvl = arg; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; dmu_buf_t *dbp; spa_history_phys_t *shpp; @@ -222,7 +222,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx) * Get the offset of where we need to write via the bonus buffer. * Update the offset when the write completes. */ - VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); + VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); shpp = dbp->db_data; dmu_buf_will_dirty(dbp, tx); @@ -326,8 +326,8 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ - dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, nvarg, 0, tx); + dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, + nvarg, 0, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free nvl */ @@ -465,10 +465,10 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); if (dmu_tx_is_syncing(tx)) { - spa_history_log_sync(spa, nvl, tx); + spa_history_log_sync(nvl, tx); } else { - dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, - spa_history_log_sync, spa, nvl, 0, tx); + dsl_sync_task_nowait(spa_get_dsl(spa), + spa_history_log_sync, nvl, 0, tx); } /* spa_history_log_sync() will free nvl */ } @@ -544,17 +544,11 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, void spa_history_log_version(spa_t *spa, const char *operation) { -#ifdef _KERNEL - uint64_t current_vers = spa_version(spa); - spa_history_log_internal(spa, operation, NULL, "pool version %llu; software version %llu/%d; uts %s %s %s %s", - (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, + (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION, utsname.nodename, utsname.release, utsname.version, utsname.machine); - cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, - (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); -#endif } #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 0ca9f3a7a622..a5e13b5fb3cf 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -268,7 +268,7 @@ spa_config_lock_init(spa_t *spa) spa_config_lock_t *scl = &spa->spa_config_lock[i]; mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); - refcount_create(&scl->scl_count); + refcount_create_untracked(&scl->scl_count); scl->scl_writer = NULL; scl->scl_write_wanted = 0; } @@ -326,6 +326,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) int wlocks_held = 0; int i; + ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY); + for (i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (scl->scl_writer == curthread) @@ -406,27 +408,22 @@ spa_lookup(const char *name) static spa_t search; /* spa_t is large; don't allocate on stack */ spa_t *spa; avl_index_t where; - char c = 0; char *cp; ASSERT(MUTEX_HELD(&spa_namespace_lock)); + (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); + /* * If it's a full dataset name, figure out the pool name and * just use that. */ - cp = strpbrk(name, "/@"); - if (cp) { - c = *cp; + cp = strpbrk(search.spa_name, "/@"); + if (cp != NULL) *cp = '\0'; - } - (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); spa = avl_find(&spa_namespace_avl, &search, &where); - if (cp) - *cp = c; - return (spa); } @@ -539,6 +536,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) KM_SLEEP) == 0); } + spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0); + return (spa); } diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index a031f3a20e6a..2cf1d2a18407 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -102,7 +102,7 @@ void space_map_add(space_map_t *sm, uint64_t start, uint64_t size) { avl_index_t where; - space_seg_t ssearch, *ss_before, *ss_after, *ss; + space_seg_t *ss_before, *ss_after, *ss; uint64_t end = start + size; int merge_before, merge_after; @@ -115,11 +115,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); - ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, &where); - - if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) { + ss = space_map_find(sm, start, size, &where); + if (ss != NULL) { zfs_panic_recover("zfs: allocating allocated segment" "(offset=%llu size=%llu)\n", (longlong_t)start, (longlong_t)size); @@ -171,19 +168,12 @@ void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) { avl_index_t where; - space_seg_t ssearch, *ss, *newseg; + space_seg_t *ss, *newseg; uint64_t end = start + size; int left_over, right_over; - ASSERT(MUTEX_HELD(sm->sm_lock)); VERIFY(!sm->sm_condensing); - VERIFY(size != 0); - VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0); - VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); - - ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, &where); + ss = space_map_find(sm, start, size, &where); /* Make sure we completely overlap with someone */ if (ss == NULL) { @@ -226,12 +216,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) sm->sm_space -= size; } -boolean_t -space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) +space_seg_t * +space_map_find(space_map_t *sm, uint64_t start, uint64_t size, + avl_index_t *wherep) { - avl_index_t where; space_seg_t ssearch, *ss; - uint64_t end = start + size; ASSERT(MUTEX_HELD(sm->sm_lock)); VERIFY(size != 0); @@ -239,10 +228,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0); ssearch.ss_start = start; - ssearch.ss_end = end; - ss = avl_find(&sm->sm_root, &ssearch, &where); + ssearch.ss_end = start + size; + ss = avl_find(&sm->sm_root, &ssearch, wherep); + + if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size) + return (ss); + return (NULL); +} + +boolean_t +space_map_contains(space_map_t *sm, uint64_t start, uint64_t size) +{ + avl_index_t where; - return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end); + return (space_map_find(sm, start, size, &where) != 0); } void diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 7c820af4f8b3..b3e537f459c4 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -659,6 +659,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg) { tx_state_t *tx = &dp->dp_tx; + ASSERT(!dsl_pool_config_held(dp)); + mutex_enter(&tx->tx_sync_lock); ASSERT(tx->tx_threads == 2); if (txg == 0) @@ -682,6 +684,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg) { tx_state_t *tx = &dp->dp_tx; + ASSERT(!dsl_pool_config_held(dp)); + mutex_enter(&tx->tx_sync_lock); ASSERT(tx->tx_threads == 2); if (txg == 0) @@ -747,42 +751,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg) } /* - * Add an entry to the list. - * Returns 0 if it's a new entry, 1 if it's already there. + * Add an entry to the list (unless it's already on the list). + * Returns B_TRUE if it was actually added. */ -int +boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - int already_on_list; + boolean_t add; mutex_enter(&tl->tl_lock); - already_on_list = tn->tn_member[t]; - if (!already_on_list) { + add = (tn->tn_member[t] == 0); + if (add) { tn->tn_member[t] = 1; tn->tn_next[t] = tl->tl_head[t]; tl->tl_head[t] = tn; } mutex_exit(&tl->tl_lock); - return (already_on_list); + return (add); } /* - * Add an entry to the end of the list (walks list to find end). - * Returns 0 if it's a new entry, 1 if it's already there. + * Add an entry to the end of the list, unless it's already on the list. + * (walks list to find end) + * Returns B_TRUE if it was actually added. */ -int +boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - int already_on_list; + boolean_t add; mutex_enter(&tl->tl_lock); - already_on_list = tn->tn_member[t]; - if (!already_on_list) { + add = (tn->tn_member[t] == 0); + if (add) { txg_node_t **tp; for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t]) @@ -794,7 +799,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg) } mutex_exit(&tl->tl_lock); - return (already_on_list); + return (add); } /* @@ -845,13 +850,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg) return (NULL); } -int +boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg) { int t = txg & TXG_MASK; txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); - return (tn->tn_member[t]); + return (tn->tn_member[t] != 0); } /* diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index a03e1c6948ea..3cf0089ecef0 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -488,13 +489,13 @@ zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name) */ /*ARGSUSED*/ int -zfsctl_snapdir_rename(struct inode *sdip, char *sname, - struct inode *tdip, char *tname, cred_t *cr, int flags) +zfsctl_snapdir_rename(struct inode *sdip, char *snm, + struct inode *tdip, char *tnm, cred_t *cr, int flags) { zfs_sb_t *zsb = ITOZSB(sdip); zfs_snapentry_t search, *sep; avl_index_t where; - char *to, *from, *real; + char *to, *from, *real, *fsname; int error; ZFS_ENTER(zsb); @@ -502,23 +503,26 @@ zfsctl_snapdir_rename(struct inode *sdip, char *sname, to = kmem_alloc(MAXNAMELEN, KM_SLEEP); from = kmem_alloc(MAXNAMELEN, KM_SLEEP); real = kmem_alloc(MAXNAMELEN, KM_SLEEP); + fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); if (zsb->z_case == ZFS_CASE_INSENSITIVE) { - error = dmu_snapshot_realname(zsb->z_os, sname, real, + error = dmu_snapshot_realname(zsb->z_os, snm, real, MAXNAMELEN, NULL); if (error == 0) { - sname = real; + snm = real; } else if (error != ENOTSUP) { goto out; } } - error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from); - if (!error) - error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to); - if (!error) + dmu_objset_name(zsb->z_os, fsname); + + error = zfsctl_snapshot_zname(sdip, snm, MAXNAMELEN, from); + if (error == 0) + error = zfsctl_snapshot_zname(tdip, tnm, MAXNAMELEN, to); + if (error == 0) error = zfs_secpolicy_rename_perms(from, to, cr); - if (error) + if (error != 0) goto out; /* @@ -532,21 +536,21 @@ zfsctl_snapdir_rename(struct inode *sdip, char *sname, /* * No-op when names are identical. */ - if (strcmp(sname, tname) == 0) { + if (strcmp(snm, tnm) == 0) { error = 0; goto out; } mutex_enter(&zsb->z_ctldir_lock); - error = dmu_objset_rename(from, to, B_FALSE); + error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); if (error) goto out_unlock; - search.se_name = (char *)sname; + search.se_name = (char *)snm; sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); if (sep) - zfsctl_rename_snap(zsb, sep, tname); + zfsctl_rename_snap(zsb, sep, tnm); out_unlock: mutex_exit(&zsb->z_ctldir_lock); @@ -554,6 +558,7 @@ zfsctl_snapdir_rename(struct inode *sdip, char *sname, kmem_free(from, MAXNAMELEN); kmem_free(to, MAXNAMELEN); kmem_free(real, MAXNAMELEN); + kmem_free(fsname, MAXNAMELEN); ZFS_EXIT(zsb); @@ -588,14 +593,14 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) } error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname); - if (!error) + if (error == 0) error = zfs_secpolicy_destroy_perms(snapname, cr); - if (error) + if (error != 0) goto out; error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE); if ((error == 0) || (error == ENOENT)) - error = dmu_objset_destroy(snapname, B_FALSE); + error = dsl_destroy_snapshot(snapname, B_FALSE); out: kmem_free(snapname, MAXNAMELEN); kmem_free(real, MAXNAMELEN); @@ -628,12 +633,12 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, dmu_objset_name(zsb->z_os, dsname); error = zfs_secpolicy_snapshot_perms(dsname, cr); - if (error) + if (error != 0) goto out; if (error == 0) { error = dmu_objset_snapshot_one(dsname, dirname); - if (error) + if (error != 0) goto out; error = zfsctl_snapdir_lookup(dip, dirname, ipp, diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index e64d6a1f04eb..acc54e5a7830 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -159,6 +159,7 @@ #include #include #include +#include #include #include #include @@ -175,9 +176,11 @@ #include #include #include -#include #include +#include +#include +#include #include #include @@ -242,11 +245,7 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *); static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp); -static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature); -static int zfs_prop_activate_feature_check(void *arg1, void *arg2, - dmu_tx_t *tx); -static void zfs_prop_activate_feature_sync(void *arg1, void *arg2, - dmu_tx_t *tx); +static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature); static void history_str_free(char *buf) @@ -430,49 +429,48 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr) { uint64_t zoned; - rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); - if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) { - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + if (dsl_prop_get_int_ds(ds, "zoned", &zoned)) return (ENOENT); - } - rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); return (zfs_dozonecheck_impl(dataset, zoned, cr)); } static int -zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) +zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, + const char *perm, cred_t *cr) { int error; - dsl_dataset_t *ds; - - error = dsl_dataset_hold(name, FTAG, &ds); - if (error != 0) - return (error); error = zfs_dozonecheck_ds(name, ds, cr); if (error == 0) { error = secpolicy_zfs(cr); - if (error) + if (error != 0) error = dsl_deleg_access_impl(ds, perm, cr); } - - dsl_dataset_rele(ds, FTAG); return (error); } static int -zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds, - const char *perm, cred_t *cr) +zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) { int error; + dsl_dataset_t *ds; + dsl_pool_t *dp; - error = zfs_dozonecheck_ds(name, ds, cr); - if (error == 0) { - error = secpolicy_zfs(cr); - if (error) - error = dsl_deleg_access_impl(ds, perm, cr); + error = dsl_pool_hold(name, FTAG, &dp); + if (error != 0) + return (error); + + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); } + + error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr); + + dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -495,7 +493,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) /* First get the existing dataset label. */ error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1, sizeof (ds_hexsl), &ds_hexsl, NULL); - if (error) + if (error != 0) return (EPERM); if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) @@ -545,7 +543,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) */ error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, setsl_tag, &os); - if (error) + if (error != 0) return (EPERM); dmu_objset_disown(os, setsl_tag); @@ -638,7 +636,7 @@ zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) int error; error = zfs_dozonecheck(zc->zc_name, cr); - if (error) + if (error != 0) return (error); /* @@ -660,7 +658,6 @@ zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) static int zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - spa_t *spa; dsl_pool_t *dp; dsl_dataset_t *ds; char *cp; @@ -673,23 +670,22 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) cp = strchr(zc->zc_name, '@'); if (cp == NULL) return (EINVAL); - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); + } dsl_dataset_name(ds, zc->zc_name); error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds, ZFS_DELEG_PERM_SEND, cr); dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -820,12 +816,21 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (EINVAL); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nextpair) { + dsl_pool_t *dp; dsl_dataset_t *ds; + error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp); + if (error != 0) + break; nextpair = nvlist_next_nvpair(snaps, pair); - error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds); - if (error == 0) { + error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds); + if (error == 0) dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); + + if (error == 0) { + error = zfs_secpolicy_destroy_perms(nvpair_name(pair), + cr); } else if (error == ENOENT) { /* * Ignore any snapshots that don't exist (we consider @@ -837,11 +842,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) */ fnvlist_remove_nvpair(snaps, pair); error = 0; - continue; - } else { - break; } - error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr); if (error != 0) break; } @@ -889,41 +890,47 @@ zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) static int zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - char parentname[MAXNAMELEN]; - objset_t *clone; + dsl_pool_t *dp; + dsl_dataset_t *clone; int error; error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_PROMOTE, cr); - if (error) + if (error != 0) + return (error); + + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); - error = dmu_objset_hold(zc->zc_name, FTAG, &clone); + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone); if (error == 0) { - dsl_dataset_t *pclone = NULL; + char parentname[MAXNAMELEN]; + dsl_dataset_t *origin = NULL; dsl_dir_t *dd; - dd = clone->os_dsl_dataset->ds_dir; + dd = clone->ds_dir; - rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_origin_obj, FTAG, &pclone); - rw_exit(&dd->dd_pool->dp_config_rwlock); - if (error) { - dmu_objset_rele(clone, FTAG); + dd->dd_phys->dd_origin_obj, FTAG, &origin); + if (error != 0) { + dsl_dataset_rele(clone, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } - error = zfs_secpolicy_write_perms(zc->zc_name, + error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone, ZFS_DELEG_PERM_MOUNT, cr); - dsl_dataset_name(pclone, parentname); - dmu_objset_rele(clone, FTAG); - dsl_dataset_rele(pclone, FTAG); - if (error == 0) - error = zfs_secpolicy_write_perms(parentname, + dsl_dataset_name(origin, parentname); + if (error == 0) { + error = zfs_secpolicy_write_perms_ds(parentname, origin, ZFS_DELEG_PERM_PROMOTE, cr); + } + dsl_dataset_rele(clone, FTAG); + dsl_dataset_rele(origin, FTAG); } + dsl_pool_rele(dp, FTAG); return (error); } @@ -1132,16 +1139,47 @@ zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) static int zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_HOLD, cr)); + nvpair_t *pair; + nvlist_t *holds; + int error; + + error = nvlist_lookup_nvlist(innvl, "holds", &holds); + if (error != 0) + return (EINVAL); + + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) { + char fsname[MAXNAMELEN]; + error = dmu_fsname(nvpair_name(pair), fsname); + if (error != 0) + return (error); + error = zfs_secpolicy_write_perms(fsname, + ZFS_DELEG_PERM_HOLD, cr); + if (error != 0) + return (error); + } + return (0); } /* ARGSUSED */ static int zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - return (zfs_secpolicy_write_perms(zc->zc_name, - ZFS_DELEG_PERM_RELEASE, cr)); + nvpair_t *pair; + int error; + + for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL; + pair = nvlist_next_nvpair(innvl, pair)) { + char fsname[MAXNAMELEN]; + error = dmu_fsname(nvpair_name(pair), fsname); + if (error != 0) + return (error); + error = zfs_secpolicy_write_perms(fsname, + ZFS_DELEG_PERM_RELEASE, cr); + if (error != 0) + return (error); + } + return (0); } /* @@ -1162,11 +1200,11 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (0); error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr); - if (!error) + if (error == 0) error = zfs_secpolicy_hold(zc, innvl, cr); - if (!error) + if (error == 0) error = zfs_secpolicy_release(zc, innvl, cr); - if (!error) + if (error == 0) error = zfs_secpolicy_destroy(zc, innvl, cr); return (error); } @@ -1276,7 +1314,7 @@ get_zfs_sb(const char *dsname, zfs_sb_t **zsbp) int error; error = dmu_objset_hold(dsname, FTAG, &os); - if (error) + if (error != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { dmu_objset_rele(os, FTAG); @@ -1379,7 +1417,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); - if (error) + if (error != 0) goto pool_props_bad; } @@ -1652,12 +1690,7 @@ zfs_ioc_pool_reguid(zfs_cmd_t *zc) static int zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc) { - int error; - - if ((error = dsl_dsobj_to_dsname(zc->zc_name,zc->zc_obj,zc->zc_value))) - return (error); - - return (0); + return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value)); } /* @@ -1974,15 +2007,14 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os) static int zfs_ioc_objset_stats(zfs_cmd_t *zc) { - objset_t *os = NULL; + objset_t *os; int error; - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) - return (error); - - error = zfs_ioc_objset_stats_impl(zc, os); - - dmu_objset_rele(os, FTAG); + error = dmu_objset_hold(zc->zc_name, FTAG, &os); + if (error == 0) { + error = zfs_ioc_objset_stats_impl(zc, os); + dmu_objset_rele(os, FTAG); + } return (error); } @@ -2003,30 +2035,23 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc) static int zfs_ioc_objset_recvd_props(zfs_cmd_t *zc) { - objset_t *os = NULL; - int error; + int error = 0; nvlist_t *nv; - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) - return (error); - /* * Without this check, we would return local property values if the * caller has not already received properties on or after * SPA_VERSION_RECVD_PROPS. */ - if (!dsl_prop_get_hasrecvd(os)) { - dmu_objset_rele(os, FTAG); + if (!dsl_prop_get_hasrecvd(zc->zc_name)) return (ENOTSUP); - } if (zc->zc_nvlist_dst != 0 && - (error = dsl_prop_get_received(os, &nv)) == 0) { + (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) { error = put_nvlist(zc, nv); nvlist_free(nv); } - dmu_objset_rele(os, FTAG); return (error); } @@ -2141,20 +2166,6 @@ zfs_ioc_dataset_list_next(zfs_cmd_t *zc) (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); p = zc->zc_name + strlen(zc->zc_name); - /* - * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0 - * but is not declared void because its called by dmu_objset_find(). - */ - if (zc->zc_cookie == 0) { - uint64_t cookie = 0; - int len = sizeof (zc->zc_name) - (p - zc->zc_name); - - while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { - if (!dataset_name_hidden(zc->zc_name)) - (void) dmu_objset_prefetch(zc->zc_name, NULL); - } - } - do { error = dmu_dir_list_next(os, sizeof (zc->zc_name) - (p - zc->zc_name), p, @@ -2197,14 +2208,10 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) objset_t *os; int error; -top: - if (zc->zc_cookie == 0 && !zc->zc_simple) - (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, - NULL, DS_FIND_SNAPSHOTS); - error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) { return (error == ENOENT ? ESRCH : error); + } /* * A dataset name of maximum length cannot have any snapshots, @@ -2224,24 +2231,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) dsl_dataset_t *ds; dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; - /* - * Since we probably don't have a hold on this snapshot, - * it's possible that the objsetid could have been destroyed - * and reused for a new objset. It's OK if this happens during - * a zfs send operation, since the new createtxg will be - * beyond the range we're interested in. - */ - rw_enter(&dp->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - if (error) { - if (error == ENOENT) { - /* Racing with destroy, get the next one. */ - *strchr(zc->zc_name, '@') = '\0'; - dmu_objset_rele(os, FTAG); - goto top; - } - } else { + if (error == 0) { objset_t *ossnap; error = dmu_objset_from_ds(ds, &ossnap); @@ -2255,7 +2246,7 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) dmu_objset_rele(os, FTAG); /* if we failed, undo the @ that we tacked on to zc_name */ - if (error) + if (error != 0) *strchr(zc->zc_name, '@') = '\0'; return (error); } @@ -2345,13 +2336,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, err = dsl_dir_set_quota(dsname, source, intval); break; case ZFS_PROP_REFQUOTA: - err = dsl_dataset_set_quota(dsname, source, intval); + err = dsl_dataset_set_refquota(dsname, source, intval); break; case ZFS_PROP_RESERVATION: err = dsl_dir_set_reservation(dsname, source, intval); break; case ZFS_PROP_REFRESERVATION: - err = dsl_dataset_set_reservation(dsname, source, intval); + err = dsl_dataset_set_refreservation(dsname, source, intval); break; case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, intval); @@ -2386,19 +2377,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, zfeature_info_t *feature = &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS]; spa_t *spa; - dsl_pool_t *dp; if ((err = spa_open(dsname, &spa, FTAG)) != 0) return (err); - dp = spa->spa_dsl_pool; - /* * Setting the LZ4 compression algorithm activates * the feature. */ if (!spa_feature_is_active(spa, feature)) { - if ((err = zfs_prop_activate_feature(dp, + if ((err = zfs_prop_activate_feature(spa, feature)) != 0) { spa_close(spa, FTAG); return (err); @@ -2557,12 +2545,12 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, if (nvpair_type(propval) == DATA_TYPE_STRING) { strval = fnvpair_value_string(propval); - err = dsl_prop_set(dsname, propname, source, 1, - strlen(strval) + 1, strval); + err = dsl_prop_set_string(dsname, propname, + source, strval); } else { intval = fnvpair_value_uint64(propval); - err = dsl_prop_set(dsname, propname, source, 8, - 1, &intval); + err = dsl_prop_set_int(dsname, propname, source, + intval); } if (err != 0) { @@ -2628,7 +2616,7 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) } static int -clear_received_props(objset_t *os, const char *fs, nvlist_t *props, +clear_received_props(const char *dsname, nvlist_t *props, nvlist_t *skipped) { int err = 0; @@ -2640,8 +2628,8 @@ clear_received_props(objset_t *os, const char *fs, nvlist_t *props, * properties at least once on or after SPA_VERSION_RECVD_PROPS. */ zprop_source_t flags = (ZPROP_SRC_NONE | - (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); - err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); + (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0)); + err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL); } nvlist_free(cleared_props); return (err); @@ -2673,22 +2661,19 @@ zfs_ioc_set_prop(zfs_cmd_t *zc) if (received) { nvlist_t *origprops; - objset_t *os; - - if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { - if (dsl_prop_get_received(os, &origprops) == 0) { - (void) clear_received_props(os, - zc->zc_name, origprops, nvl); - nvlist_free(origprops); - } - dsl_prop_set_hasrecvd(os); - dmu_objset_rele(os, FTAG); + if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) { + (void) clear_received_props(zc->zc_name, + origprops, nvl); + nvlist_free(origprops); } + + error = dsl_prop_set_hasrecvd(zc->zc_name); } errors = fnvlist_alloc(); - error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); + if (error == 0) + error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors); if (zc->zc_nvlist_dst != 0 && errors != NULL) { (void) put_nvlist(zc, errors); @@ -2771,7 +2756,7 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc) } /* property name has been validated by zfs_secpolicy_inherit_prop() */ - return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL)); + return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source)); } static int @@ -2907,7 +2892,7 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc) */ error = secpolicy_zfs(CRED()); - if (error) { + if (error != 0) { if (zc->zc_perm_action == B_FALSE) { error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, CRED()); @@ -3214,7 +3199,7 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(fsname, B_FALSE); + (void) dsl_destroy_head(fsname); } return (error); } @@ -3234,7 +3219,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) int error = 0; nvlist_t *nvprops = NULL; char *origin_name; - dsl_dataset_t *origin; if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0) return (EINVAL); @@ -3246,14 +3230,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (dataset_namecheck(origin_name, NULL, NULL) != 0) return (EINVAL); - - error = dsl_dataset_hold(origin_name, FTAG, &origin); - if (error) - return (error); - - error = dmu_objset_clone(fsname, origin, 0); - dsl_dataset_rele(origin, FTAG); - if (error) + error = dmu_objset_clone(fsname, origin_name); + if (error != 0) return (error); /* @@ -3263,7 +3241,7 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, nvprops, outnvl); if (error != 0) - (void) dmu_objset_destroy(fsname, B_FALSE); + (void) dsl_destroy_head(fsname); } return (error); } @@ -3275,7 +3253,6 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) * } * * outnvl: snapshot -> error code (int32) - * */ static int zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) @@ -3325,7 +3302,7 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) } } - error = dmu_objset_snapshot(snaps, props, outnvl); + error = dsl_dataset_snapshot(snaps, props, outnvl); return (error); } @@ -3371,43 +3348,71 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) } /* - * inputs: - * name dataset name, or when 'arg == NULL' the full snapshot name - * arg short snapshot name (i.e. part after the '@') + * The dp_config_rwlock must not be held when calling this, because the + * unmount may need to write out data. + * + * This function is best-effort. Callers must deal gracefully if it + * remains mounted (or is remounted after this call). */ -/* ARGSUSED */ -int -zfs_unmount_snap(const char *name, void *arg) +void +zfs_unmount_snap(const char *snapname) { zfs_sb_t *zsb = NULL; char *dsname; - char *snapname; char *fullname; char *ptr; - int error; - if ((ptr = strchr(name, '@')) == NULL) - return (0); + if ((ptr = strchr(snapname, '@')) == NULL) + return; - dsname = strdup(name); - dsname[ptr - name] = '\0'; + dsname = strdup(snapname); + dsname[ptr - snapname] = '\0'; snapname = strdup(ptr + 1); fullname = kmem_asprintf("%s@%s", dsname, snapname); - error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); - if (error == 0) { - error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); + if (zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE) == 0) { + ASSERT(!dsl_pool_config_held(dmu_objset_pool(zsb->z_os))); + (void) zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE); zfs_sb_rele(zsb, FTAG); - - /* Allow ENOENT for consistency with upstream */ - if (error == ENOENT) - error = 0; } strfree(dsname); - strfree(snapname); strfree(fullname); - return (error); + return; +} + +/* ARGSUSED */ +static int +zfs_unmount_snap_cb(const char *snapname, void *arg) +{ + zfs_unmount_snap(snapname); + return (0); +} + +/* + * When a clone is destroyed, its origin may also need to be destroyed, + * in which case it must be unmounted. This routine will do that unmount + * if necessary. + */ +void +zfs_destroy_unmount_origin(const char *fsname) +{ + int error; + objset_t *os; + dsl_dataset_t *ds; + + error = dmu_objset_hold(fsname, FTAG, &os); + if (error != 0) + return; + ds = dmu_objset_ds(os); + if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) { + char originname[MAXNAMELEN]; + dsl_dataset_name(ds->ds_prev, originname); + dmu_objset_rele(os, FTAG); + zfs_unmount_snap(originname); + } else { + dmu_objset_rele(os, FTAG); + } } /* @@ -3442,15 +3447,11 @@ zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) (name[poollen] != '/' && name[poollen] != '@')) return (EXDEV); - /* - * Ignore failures to unmount; dmu_snapshots_destroy_nvl() - * will deal with this gracefully (by filling in outnvl). - */ - (void) zfs_unmount_snap(name, NULL); + zfs_unmount_snap(name); (void) zvol_remove_minor(name); } - return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl)); + return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); } /* @@ -3465,13 +3466,13 @@ static int zfs_ioc_destroy(zfs_cmd_t *zc) { int err; - if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { - err = zfs_unmount_snap(zc->zc_name, NULL); - if (err) - return (err); - } + if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) + zfs_unmount_snap(zc->zc_name); - err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy); + if (strchr(zc->zc_name, '@')) + err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy); + else + err = dsl_destroy_head(zc->zc_name); if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0) (void) zvol_remove_minor(zc->zc_name); return (err); @@ -3486,79 +3487,35 @@ zfs_ioc_destroy(zfs_cmd_t *zc) static int zfs_ioc_rollback(zfs_cmd_t *zc) { - dsl_dataset_t *ds, *clone; - int error; zfs_sb_t *zsb; - char *clone_name; - - error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); - if (error) - return (error); - - /* must not be a snapshot */ - if (dsl_dataset_is_snapshot(ds)) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* must have a most recent snapshot */ - if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { - dsl_dataset_rele(ds, FTAG); - return (EINVAL); - } - - /* - * Create clone of most recent snapshot. - */ - clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name); - error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT); - if (error) - goto out; - - error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); - if (error) - goto out; + int error; - /* - * Do clone swap. - */ if (get_zfs_sb(zc->zc_name, &zsb) == 0) { error = zfs_suspend_fs(zsb); if (error == 0) { int resume_err; - if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { - error = dsl_dataset_clone_swap(clone, ds, - B_TRUE); - dsl_dataset_disown(ds, FTAG); - ds = NULL; - } else { - error = EBUSY; - } + error = dsl_dataset_rollback(zc->zc_name); resume_err = zfs_resume_fs(zsb, zc->zc_name); error = error ? error : resume_err; } deactivate_super(zsb->z_sb); } else { - if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { - error = dsl_dataset_clone_swap(clone, ds, B_TRUE); - dsl_dataset_disown(ds, FTAG); - ds = NULL; - } else { - error = EBUSY; - } + error = dsl_dataset_rollback(zc->zc_name); } + return (error); +} - /* - * Destroy clone (which also closes it). - */ - (void) dsl_dataset_destroy(clone, FTAG, B_FALSE); +static int +recursive_unmount(const char *fsname, void *arg) +{ + const char *snapname = arg; + char *fullname; -out: - strfree(clone_name); - if (ds) - dsl_dataset_rele(ds, FTAG); - return (error); + fullname = kmem_asprintf("%s@%s", fsname, snapname); + zfs_unmount_snap(fullname); + strfree(fullname); + return (0); } /* @@ -3573,6 +3530,7 @@ static int zfs_ioc_rename(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie & 1; + char *at; int err; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; @@ -3580,25 +3538,29 @@ zfs_ioc_rename(zfs_cmd_t *zc) strchr(zc->zc_value, '%')) return (EINVAL); - /* - * Unmount snapshot unless we're doing a recursive rename, - * in which case the dataset code figures out which snapshots - * to unmount. - */ - if (!recursive && strchr(zc->zc_name, '@') != NULL && - zc->zc_objset_type == DMU_OST_ZFS) { - err = zfs_unmount_snap(zc->zc_name, NULL); - if (err) - return (err); - } - - err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive); - if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) { - (void) zvol_remove_minor(zc->zc_name); - (void) zvol_create_minor(zc->zc_value); + at = strchr(zc->zc_name, '@'); + if (at != NULL) { + /* snaps must be in same fs */ + if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1)) + return (EXDEV); + *at = '\0'; + if (zc->zc_objset_type == DMU_OST_ZFS) { + int error = dmu_objset_find(zc->zc_name, + recursive_unmount, at + 1, + recursive ? DS_FIND_CHILDREN : 0); + if (error != 0) + return (error); + } + return (dsl_dataset_rename_snapshot(zc->zc_name, + at + 1, strchr(zc->zc_value, '@') + 1, recursive)); + } else { + err = dsl_dir_rename(zc->zc_name, zc->zc_value); + if (!err && zc->zc_objset_type == DMU_OST_ZVOL) { + (void) zvol_remove_minor(zc->zc_name); + (void) zvol_create_minor(zc->zc_value); + } + return (err); } - - return (err); } static int @@ -3743,36 +3705,15 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); } -/* - * Activates a feature on a pool in response to a property setting. This - * creates a new sync task which modifies the pool to reflect the feature - * as being active. - */ -static int -zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature) -{ - int err; - - /* EBUSY here indicates that the feature is already active */ - err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check, - zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2); - - if (err != 0 && err != EBUSY) - return (err); - else - return (0); -} - /* * Checks for a race condition to make sure we don't increment a feature flag * multiple times. */ -/*ARGSUSED*/ static int -zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx) +zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; if (!spa_feature_is_active(spa, feature)) return (0); @@ -3785,14 +3726,35 @@ zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx) * zfs_prop_activate_feature. */ static void -zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx) +zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx) { - spa_t *spa = arg1; - zfeature_info_t *feature = arg2; + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + zfeature_info_t *feature = arg; spa_feature_incr(spa, feature, tx); } +/* + * Activates a feature on a pool in response to a property setting. This + * creates a new sync task which modifies the pool to reflect the feature + * as being active. + */ +static int +zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature) +{ + int err; + + /* EBUSY here indicates that the feature is already active */ + err = dsl_sync_task(spa_name(spa), + zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync, + feature, 2); + + if (err != 0 && err != EBUSY) + return (err); + else + return (0); +} + /* * Removes properties from the given props list that fail permission checks * needed to clear them and to restore them in case of a receive error. For each @@ -3947,7 +3909,6 @@ static int zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; - objset_t *os; dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int fd; @@ -3957,7 +3918,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) offset_t off; nvlist_t *props = NULL; /* sent properties */ nvlist_t *origprops = NULL; /* existing properties */ - objset_t *origin = NULL; + char *origin = NULL; char *tosnap; char tofs[ZFS_MAXNAMELEN]; boolean_t first_recvd_props = B_FALSE; @@ -3985,18 +3946,31 @@ zfs_ioc_recv(zfs_cmd_t *zc) VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { - if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) && - !dsl_prop_get_hasrecvd(os)) { + if (zc->zc_string[0]) + origin = zc->zc_string; + + error = dmu_recv_begin(tofs, tosnap, + &zc->zc_begin_record, force, origin, &drc); + if (error != 0) + goto out; + + /* + * Set properties before we receive the stream so that they are applied + * to the new data. Note that we must call dmu_recv_stream() if + * dmu_recv_begin() succeeds. + */ + if (props != NULL && !drc.drc_newfs) { + if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >= + SPA_VERSION_RECVD_PROPS && + !dsl_prop_get_hasrecvd(tofs)) first_recvd_props = B_TRUE; - } /* * If new received properties are supplied, they are to * completely replace the existing received properties, so stash * away the existing ones. */ - if (dsl_prop_get_received(os, &origprops) == 0) { + if (dsl_prop_get_received(tofs, &origprops) == 0) { nvlist_t *errlist = NULL; /* * Don't bother writing a property if its value won't @@ -4008,53 +3982,25 @@ zfs_ioc_recv(zfs_cmd_t *zc) */ if (!first_recvd_props) props_reduce(props, origprops); - if (zfs_check_clearable(tofs, origprops, - &errlist) != 0) + if (zfs_check_clearable(tofs, origprops, &errlist) != 0) (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); - } - dmu_objset_rele(os, FTAG); - } - - if (zc->zc_string[0]) { - error = dmu_objset_hold(zc->zc_string, FTAG, &origin); - if (error) - goto out; - } - - error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds, - &zc->zc_begin_record, force, origin, &drc); - if (origin) - dmu_objset_rele(origin, FTAG); - if (error) - goto out; - - /* - * Set properties before we receive the stream so that they are applied - * to the new data. Note that we must call dmu_recv_stream() if - * dmu_recv_begin() succeeds. - */ - if (props) { - if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) { - if (drc.drc_newfs) { - if (spa_version(os->os_spa) >= - SPA_VERSION_RECVD_PROPS) - first_recvd_props = B_TRUE; - } else if (origprops != NULL) { - if (clear_received_props(os, tofs, origprops, - first_recvd_props ? NULL : props) != 0) - zc->zc_obj |= ZPROP_ERR_NOCLEAR; - } else { + if (clear_received_props(tofs, origprops, + first_recvd_props ? NULL : props) != 0) zc->zc_obj |= ZPROP_ERR_NOCLEAR; - } - dsl_prop_set_hasrecvd(os); - } else if (!drc.drc_newfs) { + } else { zc->zc_obj |= ZPROP_ERR_NOCLEAR; } + } + + if (props != NULL) { + props_error = dsl_prop_set_hasrecvd(tofs); - (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, errors); + if (props_error == 0) { + (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, + props, errors); + } } if (zc->zc_nvlist_dst_size != 0 && @@ -4106,22 +4052,16 @@ zfs_ioc_recv(zfs_cmd_t *zc) /* * On error, restore the original props. */ - if (error && props) { - if (dmu_objset_hold(tofs, FTAG, &os) == 0) { - if (clear_received_props(os, tofs, props, NULL) != 0) { - /* - * We failed to clear the received properties. - * Since we may have left a $recvd value on the - * system, we can't clear the $hasrecvd flag. - */ - zc->zc_obj |= ZPROP_ERR_NORESTORE; - } else if (first_recvd_props) { - dsl_prop_unset_hasrecvd(os); - } - dmu_objset_rele(os, FTAG); - } else if (!drc.drc_newfs) { - /* We failed to clear the received properties. */ + if (error != 0 && props != NULL && !drc.drc_newfs) { + if (clear_received_props(tofs, props, NULL) != 0) { + /* + * We failed to clear the received properties. + * Since we may have left a $recvd value on the + * system, we can't clear the $hasrecvd flag. + */ zc->zc_obj |= ZPROP_ERR_NORESTORE; + } else if (first_recvd_props) { + dsl_prop_unset_hasrecvd(tofs); } if (origprops == NULL && !drc.drc_newfs) { @@ -4173,100 +4113,75 @@ zfs_ioc_recv(zfs_cmd_t *zc) static int zfs_ioc_send(zfs_cmd_t *zc) { - objset_t *fromsnap = NULL; - objset_t *tosnap; int error; offset_t off; - dsl_dataset_t *ds; - dsl_dataset_t *dsfrom = NULL; - spa_t *spa; - dsl_pool_t *dp; boolean_t estimate = (zc->zc_guid != 0); - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) - return (error); + if (zc->zc_obj != 0) { + dsl_pool_t *dp; + dsl_dataset_t *tosnap; - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) - return (error); - - error = dmu_objset_from_ds(ds, &tosnap); - if (error) { - dsl_dataset_rele(ds, FTAG); - return (error); - } - - if (zc->zc_fromobj != 0) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom); - rw_exit(&dp->dp_config_rwlock); - if (error) { - dsl_dataset_rele(ds, FTAG); + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); - } - error = dmu_objset_from_ds(dsfrom, &fromsnap); - if (error) { - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); + + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); return (error); } + + if (dsl_dir_is_clone(tosnap->ds_dir)) + zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj; + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); } - if (zc->zc_obj) { - dsl_pool_t *dp = ds->ds_dir->dd_pool; + if (estimate) { + dsl_pool_t *dp; + dsl_dataset_t *tosnap; + dsl_dataset_t *fromsnap = NULL; + + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) + return (error); - if (fromsnap != NULL) { - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); - return (EINVAL); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); } - if (dsl_dir_is_clone(ds->ds_dir)) { - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom); - rw_exit(&dp->dp_config_rwlock); - if (error) { - dsl_dataset_rele(ds, FTAG); - return (error); - } - error = dmu_objset_from_ds(dsfrom, &fromsnap); - if (error) { - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); + if (zc->zc_fromobj != 0) { + error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, + FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } } - } - if (estimate) { error = dmu_send_estimate(tosnap, fromsnap, &zc->zc_objset_type); + + if (fromsnap != NULL) + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); } else { file_t *fp = getf(zc->zc_cookie); - if (fp == NULL) { - dsl_dataset_rele(ds, FTAG); - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); + if (fp == NULL) return (EBADF); - } off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, - zc->zc_cookie, fp->f_vnode, &off); + error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, + zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); } - if (dsfrom) - dsl_dataset_rele(dsfrom, FTAG); - dsl_dataset_rele(ds, FTAG); return (error); } @@ -4281,13 +4196,21 @@ zfs_ioc_send(zfs_cmd_t *zc) static int zfs_ioc_send_progress(zfs_cmd_t *zc) { + dsl_pool_t *dp; dsl_dataset_t *ds; dmu_sendarg_t *dsp = NULL; int error; - if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0) + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); + if (error != 0) return (error); + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + mutex_enter(&ds->ds_sendstream_lock); /* @@ -4311,6 +4234,7 @@ zfs_ioc_send_progress(zfs_cmd_t *zc) mutex_exit(&ds->ds_sendstream_lock); dsl_dataset_rele(ds, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -4417,7 +4341,7 @@ zfs_ioc_clear(zfs_cmd_t *zc) } } - if (error) + if (error != 0) return (error); spa_vdev_state_enter(spa, SCL_NONE); @@ -4455,7 +4379,7 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc) int error; error = spa_open(zc->zc_name, &spa, FTAG); - if (error) + if (error != 0) return (error); spa_vdev_state_enter(spa, SCL_NONE); @@ -4495,7 +4419,7 @@ zfs_ioc_promote(zfs_cmd_t *zc) if (cp) *cp = '\0'; (void) dmu_objset_find(zc->zc_value, - zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); + zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS); return (dsl_dataset_promote(zc->zc_name, zc->zc_string)); } @@ -4521,7 +4445,7 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc) return (EINVAL); error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); - if (error) + if (error != 0) return (error); error = zfs_userspace_one(zsb, @@ -4554,7 +4478,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) return (ENOMEM); error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); - if (error) + if (error != 0) return (error); buf = vmem_alloc(bufsize, KM_SLEEP); @@ -4604,7 +4528,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) } else { /* XXX kind of reading contents without owning */ error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) return (error); error = dmu_objset_userspace_upgrade(os); @@ -4639,7 +4563,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) int error; error = dmu_objset_hold(zc->zc_name, FTAG, &os); - if (error) + if (error != 0) return (error); error = dmu_object_next(os, &zc->zc_obj, B_FALSE, @@ -4662,25 +4586,26 @@ static int zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) { char *snap_name; + char *hold_name; int error; + minor_t minor; - snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value, - (u_longlong_t)ddi_get_lbolt64()); - - if (strlen(snap_name) >= MAXPATHLEN) { - strfree(snap_name); - return (E2BIG); - } - - error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd); - if (error != 0) { - strfree(snap_name); + error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); + if (error != 0) return (error); - } - (void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1); + snap_name = kmem_asprintf("%s-%016llx", zc->zc_value, + (u_longlong_t)ddi_get_lbolt64()); + hold_name = kmem_asprintf("%%%s", zc->zc_value); + + error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor, + hold_name); + if (error == 0) + (void) strcpy(zc->zc_value, snap_name); strfree(snap_name); - return (0); + strfree(hold_name); + zfs_onexit_fd_rele(zc->zc_cleanup_fd); + return (error); } /* @@ -4695,39 +4620,22 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc) static int zfs_ioc_diff(zfs_cmd_t *zc) { - objset_t *fromsnap; - objset_t *tosnap; file_t *fp; offset_t off; int error; - error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); - if (error) - return (error); - - error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); - return (error); - } - fp = getf(zc->zc_cookie); - if (fp == NULL) { - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); + if (fp == NULL) return (EBADF); - } off = fp->f_offset; - error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off); + error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; releasef(zc->zc_cookie); - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); return (error); } @@ -4799,13 +4707,13 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) ZFS_SHARES_DIR); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { + if (error != 0) { dmu_tx_abort(tx); } else { error = zfs_create_share_dir(zsb, tx); dmu_tx_commit(tx); } - if (error) { + if (error != 0) { mutex_exit(&zsb->z_lock); VN_RELE(vp); ZFS_EXIT(zsb); @@ -4886,124 +4794,82 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) } /* - * inputs: - * zc_name name of filesystem - * zc_value short name of snap - * zc_string user-supplied tag for this hold - * zc_cookie recursive flag - * zc_temphold set if hold is temporary - * zc_cleanup_fd cleanup-on-exit file descriptor for calling process - * zc_sendobj if non-zero, the objid for zc_name@zc_value - * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg + * innvl: { + * "holds" -> { snapname -> holdname (string), ... } + * (optional) "cleanup_fd" -> fd (int32) + * } * - * outputs: none + * outnvl: { + * snapname -> error value (int32) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_hold(zfs_cmd_t *zc) +zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist) { - boolean_t recursive = zc->zc_cookie; - spa_t *spa; - dsl_pool_t *dp; - dsl_dataset_t *ds; + nvlist_t *holds; + int cleanup_fd = -1; int error; minor_t minor = 0; - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - - if (zc->zc_sendobj == 0) { - return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value, - zc->zc_string, recursive, zc->zc_temphold, - zc->zc_cleanup_fd)); - } - - if (recursive) + error = nvlist_lookup_nvlist(args, "holds", &holds); + if (error != 0) return (EINVAL); - error = spa_open(zc->zc_name, &spa, FTAG); - if (error) - return (error); - - dp = spa_get_dsl(spa); - rw_enter(&dp->dp_config_rwlock, RW_READER); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds); - rw_exit(&dp->dp_config_rwlock); - spa_close(spa, FTAG); - if (error) - return (error); - - /* - * Until we have a hold on this snapshot, it's possible that - * zc_sendobj could've been destroyed and reused as part - * of a later txg. Make sure we're looking at the right object. - */ - if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) { - dsl_dataset_rele(ds, FTAG); - return (ENOENT); - } - - if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) { - error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor); - if (error) { - dsl_dataset_rele(ds, FTAG); + if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) { + error = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (error != 0) return (error); - } - } - - error = dsl_dataset_user_hold_for_send(ds, zc->zc_string, - zc->zc_temphold); - if (minor != 0) { - if (error == 0) { - dsl_register_onexit_hold_cleanup(ds, zc->zc_string, - minor); - } - zfs_onexit_fd_rele(zc->zc_cleanup_fd); } - dsl_dataset_rele(ds, FTAG); + error = dsl_dataset_user_hold(holds, minor, errlist); + if (minor != 0) + zfs_onexit_fd_rele(cleanup_fd); return (error); } /* - * inputs: - * zc_name name of dataset from which we're releasing a user hold - * zc_value short name of snap - * zc_string user-supplied tag for this hold - * zc_cookie recursive flag + * innvl is not used. * - * outputs: none + * outnvl: { + * holdname -> time added (uint64 seconds since epoch) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_release(zfs_cmd_t *zc) +zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl) { - boolean_t recursive = zc->zc_cookie; - - if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0) - return (EINVAL); - - return (dsl_dataset_user_release(zc->zc_name, zc->zc_value, - zc->zc_string, recursive)); + return (dsl_dataset_get_holds(snapname, outnvl)); } /* - * inputs: - * zc_name name of filesystem + * innvl: { + * snapname -> { holdname, ... } + * ... + * } * - * outputs: - * zc_nvlist_src{_size} nvlist of snapshot holds + * outnvl: { + * snapname -> error value (int32) + * ... + * } */ +/* ARGSUSED */ static int -zfs_ioc_get_holds(zfs_cmd_t *zc) +zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist) { - nvlist_t *nvp; - int error; + nvpair_t *pair; - if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) { - error = put_nvlist(zc, nvp); - nvlist_free(nvp); - } + /* + * The release may cause the snapshot to be destroyed; make sure it + * is not mounted. + */ + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; + pair = nvlist_next_nvpair(holds, pair)) + zfs_unmount_snap(nvpair_name(pair)); - return (error); + return (dsl_dataset_user_release(holds, errlist)); } /* @@ -5044,7 +4910,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc) break; error = zfs_zevent_wait(ze); - if (error) + if (error != 0) break; } while (1); @@ -5082,14 +4948,21 @@ static int zfs_ioc_space_written(zfs_cmd_t *zc) { int error; + dsl_pool_t *dp; dsl_dataset_t *new, *old; - error = dsl_dataset_hold(zc->zc_name, FTAG, &new); + error = dsl_pool_hold(zc->zc_name, FTAG, &dp); if (error != 0) return (error); - error = dsl_dataset_hold(zc->zc_value, FTAG, &old); + error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -5097,6 +4970,7 @@ zfs_ioc_space_written(zfs_cmd_t *zc) &zc->zc_objset_type, &zc->zc_perm_action); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -5115,6 +4989,7 @@ static int zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) { int error; + dsl_pool_t *dp; dsl_dataset_t *new, *old; char *firstsnap; uint64_t used, comp, uncomp; @@ -5122,18 +4997,26 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0) return (EINVAL); - error = dsl_dataset_hold(lastsnap, FTAG, &new); + error = dsl_pool_hold(lastsnap, FTAG, &dp); if (error != 0) return (error); - error = dsl_dataset_hold(firstsnap, FTAG, &old); + + error = dsl_dataset_hold(dp, lastsnap, FTAG, &new); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + error = dsl_dataset_hold(dp, firstsnap, FTAG, &old); if (error != 0) { dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp); dsl_dataset_rele(old, FTAG); dsl_dataset_rele(new, FTAG); + dsl_pool_rele(dp, FTAG); fnvlist_add_uint64(outnvl, "used", used); fnvlist_add_uint64(outnvl, "compressed", comp); fnvlist_add_uint64(outnvl, "uncompressed", uncomp); @@ -5152,49 +5035,28 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) static int zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *fromsnap = NULL; - objset_t *tosnap; int error; offset_t off; - char *fromname; + char *fromname = NULL; int fd; + file_t *fp; error = nvlist_lookup_int32(innvl, "fd", &fd); if (error != 0) return (EINVAL); - error = dmu_objset_hold(snapname, FTAG, &tosnap); - if (error) - return (error); + (void) nvlist_lookup_string(innvl, "fromsnap", &fromname); - error = nvlist_lookup_string(innvl, "fromsnap", &fromname); - if (error == 0) { - error = dmu_objset_hold(fromname, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); - return (error); - } - } - - { - file_t *fp = getf(fd); - if (fp == NULL) { - dmu_objset_rele(tosnap, FTAG); - if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); + if ((fp = getf(fd)) == NULL) return (EBADF); - } off = fp->f_offset; - error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off); + error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; - } + releasef(fd); - if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); return (error); } @@ -5213,21 +5075,29 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) static int zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) { - objset_t *fromsnap = NULL; - objset_t *tosnap; + dsl_pool_t *dp; + dsl_dataset_t *fromsnap = NULL; + dsl_dataset_t *tosnap; int error; char *fromname; uint64_t space; - error = dmu_objset_hold(snapname, FTAG, &tosnap); - if (error) + error = dsl_pool_hold(snapname, FTAG, &dp); + if (error != 0) return (error); + error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap); + if (error != 0) { + dsl_pool_rele(dp, FTAG); + return (error); + } + error = nvlist_lookup_string(innvl, "fromsnap", &fromname); if (error == 0) { - error = dmu_objset_hold(fromname, FTAG, &fromsnap); - if (error) { - dmu_objset_rele(tosnap, FTAG); + error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); + if (error != 0) { + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } } @@ -5236,8 +5106,9 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) fnvlist_add_uint64(outnvl, "space", space); if (fromsnap != NULL) - dmu_objset_rele(fromsnap, FTAG); - dmu_objset_rele(tosnap, FTAG); + dsl_dataset_rele(fromsnap, FTAG); + dsl_dataset_rele(tosnap, FTAG); + dsl_pool_rele(dp, FTAG); return (error); } @@ -5382,6 +5253,17 @@ zfs_ioctl_init(void) zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("hold", ZFS_IOC_HOLD, + zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("release", ZFS_IOC_RELEASE, + zfs_ioc_release, zfs_secpolicy_release, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + + zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS, + zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, @@ -5459,8 +5341,6 @@ zfs_ioctl_init(void) zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN, zfs_ioc_space_written); - zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS, - zfs_ioc_get_holds); zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS, zfs_ioc_objset_recvd_props); zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ, @@ -5503,10 +5383,6 @@ zfs_ioctl_init(void) zfs_secpolicy_recv); zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote, zfs_secpolicy_promote); - zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold, - zfs_secpolicy_hold); - zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release, - zfs_secpolicy_release); zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP, zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop); zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl, @@ -5866,7 +5742,7 @@ zfs_attach(void) offsetof(zfsdev_state_t, zs_next)); error = misc_register(&zfs_misc); - if (error) { + if (error != 0) { printk(KERN_INFO "ZFS: misc_register() failed %d\n", error); return (error); } @@ -5880,7 +5756,7 @@ zfs_detach(void) int error; error = misc_deregister(&zfs_misc); - if (error) + if (error != 0) printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error); mutex_destroy(&zfsdev_state_lock); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 8fee441b14e5..eeac0391cb05 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -248,28 +248,31 @@ zfs_register_callbacks(zfs_sb_t *zsb) * overboard... */ ds = dmu_objset_ds(os); + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); error = dsl_prop_register(ds, - "atime", atime_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "xattr", xattr_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "recordsize", blksz_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "readonly", readonly_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "devices", devices_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "setuid", setuid_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "exec", exec_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "snapdir", snapdir_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "aclinherit", acl_inherit_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, + zsb); error = error ? error : dsl_prop_register(ds, - "vscan", vscan_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "nbmand", nbmand_changed_cb, zsb); + zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto unregister; @@ -284,18 +287,28 @@ zfs_register_callbacks(zfs_sb_t *zsb) * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ - (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, - zsb); - (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb); - (void) dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME), + atime_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR), + xattr_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), + blksz_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), + readonly_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES), + devices_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID), + setuid_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC), + exec_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR), + snapdir_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT), + acl_inherit_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN), + vscan_changed_cb, zsb); + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND), + nbmand_changed_cb, zsb); return (error); } @@ -305,8 +318,6 @@ static int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, uint64_t *userp, uint64_t *groupp) { - int error = 0; - /* * Is it a valid type of object to track? */ @@ -363,7 +374,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, *groupp = BSWAP_64(*groupp); } } - return (error); + return (0); } static void @@ -726,7 +737,7 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp) mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); - rrw_init(&zsb->z_teardown_lock); + rrw_init(&zsb->z_teardown_lock, B_FALSE); rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) @@ -1138,7 +1149,7 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting) if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) && !zfs_is_readonly(zsb)) txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); - (void) dmu_objset_evict_dbufs(zsb->z_os); + dmu_objset_evict_dbufs(zsb->z_os); return (0); } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index c1796937b568..d59c92c093f4 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -257,7 +257,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, } } - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + VERIFY(arc_buf_remove_ref(abuf, &abuf)); } return (error); @@ -356,7 +356,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, break; error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); - if (error) + if (error != 0) break; for (lrp = lrbuf; lrp < end; lrp += reclen) { @@ -492,7 +492,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg) if (dsl_dataset_is_snapshot(ds)) panic("dirtying snapshot!"); - if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) { + if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, zilog); } @@ -658,8 +658,8 @@ zil_claim(const char *osname, void *txarg) objset_t *os; int error; - error = dmu_objset_hold(osname, FTAG, &os); - if (error) { + error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os); + if (error != 0) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } @@ -672,7 +672,7 @@ zil_claim(const char *osname, void *txarg) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); return (0); } @@ -697,7 +697,7 @@ zil_claim(const char *osname, void *txarg) } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_rele(os, FTAG); + dmu_objset_disown(os, FTAG); return (0); } @@ -717,7 +717,7 @@ zil_check_log_chain(const char *osname, void *tx) ASSERT(tx == NULL); error = dmu_objset_hold(osname, FTAG, &os); - if (error) { + if (error != 0) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } @@ -1014,7 +1014,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); use_slog = USE_SLOG(zilog); - error = zio_alloc_zil(spa, txg, bp, zil_blksz, USE_SLOG(zilog)); + error = zio_alloc_zil(spa, txg, bp, zil_blksz, + USE_SLOG(zilog)); if (use_slog) { ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); @@ -1025,7 +1026,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) ZIL_STAT_BUMP(zil_itx_metaslab_normal_count); ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused); } - if (!error) { + if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; @@ -1145,7 +1146,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) txg_wait_synced(zilog->zl_dmu_pool, txg); return (lwb); } - if (error) { + if (error != 0) { ASSERT(error == ENOENT || error == EEXIST || error == EALREADY); return (lwb); @@ -1807,6 +1808,9 @@ zil_free(zilog_t *zilog) zilog->zl_stop_sync = 1; + ASSERT0(zilog->zl_suspend); + ASSERT0(zilog->zl_suspending); + ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); @@ -1905,32 +1909,100 @@ zil_close(zilog_t *zilog) mutex_exit(&zilog->zl_lock); } +static char *suspend_tag = "zil suspending"; + /* * Suspend an intent log. While in suspended mode, we still honor * synchronous semantics, but we rely on txg_wait_synced() to do it. - * We suspend the log briefly when taking a snapshot so that the snapshot - * contains all the data it's supposed to, and has an empty intent log. + * On old version pools, we suspend the log briefly when taking a + * snapshot so that it will have an empty intent log. + * + * Long holds are not really intended to be used the way we do here -- + * held for such a short time. A concurrent caller of dsl_dataset_long_held() + * could fail. Therefore we take pains to only put a long hold if it is + * actually necessary. Fortunately, it will only be necessary if the + * objset is currently mounted (or the ZVOL equivalent). In that case it + * will already have a long hold, so we are not really making things any worse. + * + * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or + * zvol_state_t), and use their mechanism to prevent their hold from being + * dropped (e.g. VFS_HOLD()). However, that would be even more pain for + * very little gain. + * + * if cookiep == NULL, this does both the suspend & resume. + * Otherwise, it returns with the dataset "long held", and the cookie + * should be passed into zil_resume(). */ int -zil_suspend(zilog_t *zilog) +zil_suspend(const char *osname, void **cookiep) { - const zil_header_t *zh = zilog->zl_header; + objset_t *os; + zilog_t *zilog; + const zil_header_t *zh; + int error; + + error = dmu_objset_hold(osname, suspend_tag, &os); + if (error != 0) + return (error); + zilog = dmu_objset_zil(os); mutex_enter(&zilog->zl_lock); + zh = zilog->zl_header; + if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */ mutex_exit(&zilog->zl_lock); + dmu_objset_rele(os, suspend_tag); return (EBUSY); } - if (zilog->zl_suspend++ != 0) { + + /* + * Don't put a long hold in the cases where we can avoid it. This + * is when there is no cookie so we are doing a suspend & resume + * (i.e. called from zil_vdev_offline()), and there's nothing to do + * for the suspend because it's already suspended, or there's no ZIL. + */ + if (cookiep == NULL && !zilog->zl_suspending && + (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) { + mutex_exit(&zilog->zl_lock); + dmu_objset_rele(os, suspend_tag); + return (0); + } + + dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag); + dsl_pool_rele(dmu_objset_pool(os), suspend_tag); + + zilog->zl_suspend++; + + if (zilog->zl_suspend > 1) { /* - * Someone else already began a suspend. + * Someone else is already suspending it. * Just wait for them to finish. */ + while (zilog->zl_suspending) cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); mutex_exit(&zilog->zl_lock); + + if (cookiep == NULL) + zil_resume(os); + else + *cookiep = os; + return (0); + } + + /* + * If there is no pointer to an on-disk block, this ZIL must not + * be active (e.g. filesystem not mounted), so there's nothing + * to clean up. + */ + if (BP_IS_HOLE(&zh->zh_log)) { + ASSERT(cookiep != NULL); /* fast path already handled */ + + *cookiep = os; + mutex_exit(&zilog->zl_lock); return (0); } + zilog->zl_suspending = B_TRUE; mutex_exit(&zilog->zl_lock); @@ -1943,16 +2015,25 @@ zil_suspend(zilog_t *zilog) cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); + if (cookiep == NULL) + zil_resume(os); + else + *cookiep = os; return (0); } void -zil_resume(zilog_t *zilog) +zil_resume(void *cookie) { + objset_t *os = cookie; + zilog_t *zilog = dmu_objset_zil(os); + mutex_enter(&zilog->zl_lock); ASSERT(zilog->zl_suspend != 0); zilog->zl_suspend--; mutex_exit(&zilog->zl_lock); + dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); + dsl_dataset_rele(dmu_objset_ds(os), suspend_tag); } typedef struct zil_replay_arg { @@ -2025,7 +2106,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) { error = zil_read_log_data(zilog, (lr_write_t *)lr, zr->zr_lr + reclen); - if (error) + if (error != 0) return (zil_replay_error(zilog, lr, error)); } @@ -2046,7 +2127,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) * is updated if we are in replay mode. */ error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap); - if (error) { + if (error != 0) { /* * The DMU's dnode layer doesn't see removes until the txg * commits, so a subsequent claim can spuriously fail with @@ -2056,7 +2137,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) */ txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE); - if (error) + if (error != 0) return (zil_replay_error(zilog, lr, error)); } return (0); @@ -2128,21 +2209,12 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx) int zil_vdev_offline(const char *osname, void *arg) { - objset_t *os; - zilog_t *zilog; int error; - error = dmu_objset_hold(osname, FTAG, &os); - if (error) - return (error); - - zilog = dmu_objset_zil(os); - if (zil_suspend(zilog) != 0) - error = EEXIST; - else - zil_resume(zilog); - dmu_objset_rele(os, FTAG); - return (error); + error = zil_suspend(osname, NULL); + if (error != 0) + return (EEXIST); + return (0); } #if defined(_KERNEL) && defined(HAVE_SPL) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ccefaf8ac1cb..0e2b463ac9e2 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -767,6 +767,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies) void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { + metaslab_check_free(spa, bp); bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp); } @@ -785,6 +786,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, arc_freed(spa, bp); + metaslab_check_free(spa, bp); + zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); @@ -2060,7 +2063,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) bcmp(abuf->b_data, zio->io_orig_data, zio->io_orig_size) != 0) error = EEXIST; - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + VERIFY(arc_buf_remove_ref(abuf, &abuf)); } ddt_enter(ddt); @@ -2656,8 +2659,9 @@ zio_vdev_io_assess(zio_t *zio) * set vdev_cant_write so that we stop trying to allocate from it. */ if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE && - vd != NULL && !vd->vdev_ops->vdev_op_leaf) + vd != NULL && !vd->vdev_ops->vdev_op_leaf) { vd->vdev_cant_write = B_TRUE; + } if (zio->io_error) zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index b51615637244..f52d8bbc142a 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -315,6 +315,13 @@ zvol_set_volsize(const char *name, uint64_t volsize) uint64_t readonly; int error; + error = dsl_prop_get_integer(name, + zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); + if (error != 0) + return (error); + if (readonly) + return (EROFS); + mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); @@ -1459,8 +1466,7 @@ zvol_remove_minor(const char *name) } static int -zvol_create_minors_cb(spa_t *spa, uint64_t dsobj, - const char *dsname, void *arg) +zvol_create_minors_cb(const char *dsname, void *arg) { if (strchr(dsname, '/') == NULL) return 0; @@ -1474,7 +1480,7 @@ zvol_create_minors_cb(spa_t *spa, uint64_t dsobj, * for all available pools. */ int -zvol_create_minors(const char *pool) +zvol_create_minors(char *pool) { spa_t *spa = NULL; int error = 0; @@ -1484,13 +1490,12 @@ zvol_create_minors(const char *pool) mutex_enter(&zvol_state_lock); if (pool) { - error = dmu_objset_find_spa(NULL, pool, zvol_create_minors_cb, + error = dmu_objset_find(pool, zvol_create_minors_cb, NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); } else { mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { - error = dmu_objset_find_spa(NULL, - spa_name(spa), zvol_create_minors_cb, NULL, + error = dmu_objset_find(spa_name(spa), zvol_create_minors_cb, NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); if (error) break; diff --git a/module/zpios/pios.c b/module/zpios/pios.c index 53cc77bd9762..be56b771d34c 100644 --- a/module/zpios/pios.c +++ b/module/zpios/pios.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "zpios-internal.h" @@ -224,9 +225,9 @@ zpios_dmu_setup(run_args_t *run_args) run_args->os = os; out_destroy: if (rc) { - rc2 = dmu_objset_destroy(name, B_FALSE); + rc2 = dsl_destroy_head(name); if (rc2) - zpios_print(run_args->file, "Error dmu_objset_destroy" + zpios_print(run_args->file, "Error dsl_destroy_head" "(%s, ...) failed: %d\n", name, rc2); } out: @@ -395,9 +396,9 @@ zpios_remove_objset(run_args_t *run_args) dmu_objset_disown(run_args->os, zpios_tag); if (run_args->flags & DMU_REMOVE) { - rc = dmu_objset_destroy(name, B_FALSE); + rc = dsl_destroy_head(name); if (rc) - zpios_print(run_args->file, "Error dmu_objset_destroy" + zpios_print(run_args->file, "Error dsl_destroy_head" "(%s, ...) failed: %d\n", name, rc); }