diff --git a/.gitignore b/.gitignore index 549fa59f3822..ae9e22dfa7bb 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,14 @@ cscope.* *.log venv +# +# Module leftovers +# +/module/avl/zavl.mod +/module/icp/icp.mod +/module/lua/zlua.mod +/module/nvpair/znvpair.mod +/module/spl/spl.mod +/module/unicode/zunicode.mod +/module/zcommon/zcommon.mod +/module/zfs/zfs.mod diff --git a/META b/META index d9285b7732e4..960a2b73ab30 100644 --- a/META +++ b/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 0.8.1 +Version: 0.8.2 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS on Linux -Linux-Maximum: 5.1 +Linux-Maximum: 5.3 Linux-Minimum: 2.6.32 diff --git a/Makefile.am b/Makefile.am index 1ec2514922a9..9afe22954101 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,7 +52,8 @@ distclean-local:: -type f -print | xargs $(RM) all-local: - -${top_srcdir}/scripts/zfs-tests.sh -c + -[ -x ${top_builddir}/scripts/zfs-tests.sh ] && \ + ${top_builddir}/scripts/zfs-tests.sh -c dist-hook: gitrev cp ${top_srcdir}/include/zfs_gitrev.h $(distdir)/include; \ diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 0d990789b0c6..88609e455f2b 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -5,4 +5,4 @@ if USING_PYTHON SUBDIRS += arcstat arc_summary dbufstat endif -SUBDIRS += mount_zfs zed zvol_id +SUBDIRS += mount_zfs zed zvol_id zvol_wait diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 9c11315f2a58..fb479f9b5c79 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -1,12 +1,11 @@ +SUBDIRS = zed.d + include $(top_srcdir)/config/Rules.am DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include -EXTRA_DIST = zed.d/README \ - zed.d/history_event-zfs-list-cacher.sh.in - sbin_PROGRAMS = zed ZED_SRC = \ @@ -47,55 +46,3 @@ zed_LDADD = \ zed_LDADD += -lrt zed_LDFLAGS = -pthread - -zedconfdir = $(sysconfdir)/zfs/zed.d - -dist_zedconf_DATA = \ - zed.d/zed-functions.sh \ - zed.d/zed.rc - -zedexecdir = $(zfsexecdir)/zed.d - -dist_zedexec_SCRIPTS = \ - zed.d/all-debug.sh \ - zed.d/all-syslog.sh \ - zed.d/data-notify.sh \ - zed.d/generic-notify.sh \ - zed.d/resilver_finish-notify.sh \ - zed.d/scrub_finish-notify.sh \ - zed.d/statechange-led.sh \ - zed.d/statechange-notify.sh \ - zed.d/vdev_clear-led.sh \ - zed.d/vdev_attach-led.sh \ - zed.d/pool_import-led.sh \ - zed.d/resilver_finish-start-scrub.sh - -nodist_zedexec_SCRIPTS = zed.d/history_event-zfs-list-cacher.sh - -$(nodist_zedexec_SCRIPTS): %: %.in - -$(SED) -e 's,@bindir\@,$(bindir),g' \ - -e 's,@runstatedir\@,$(runstatedir),g' \ - -e 's,@sbindir\@,$(sbindir),g' \ - -e 's,@sysconfdir\@,$(sysconfdir),g' \ - $< >'$@' - -zedconfdefaults = \ - all-syslog.sh \ - data-notify.sh \ - resilver_finish-notify.sh \ - scrub_finish-notify.sh \ - statechange-led.sh \ - statechange-notify.sh \ - vdev_clear-led.sh \ - vdev_attach-led.sh \ - pool_import-led.sh \ - resilver_finish-start-scrub.sh - -install-data-hook: - $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" - for f in $(zedconfdefaults); do \ - test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ - -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ - ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ - done - chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 6d392604bceb..006e0ab99f47 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -116,7 +116,8 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) /* * On a devid match, grab the vdev guid and expansion time, if any. */ - if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && + if (gsp->gs_devid != NULL && + (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am new file mode 100644 index 000000000000..716db2b2f215 --- /dev/null +++ b/cmd/zed/zed.d/Makefile.am @@ -0,0 +1,57 @@ +include $(top_srcdir)/config/Rules.am + +EXTRA_DIST = \ + README \ + history_event-zfs-list-cacher.sh.in + +zedconfdir = $(sysconfdir)/zfs/zed.d + +dist_zedconf_DATA = \ + zed-functions.sh \ + zed.rc + +zedexecdir = $(zfsexecdir)/zed.d + +dist_zedexec_SCRIPTS = \ + all-debug.sh \ + all-syslog.sh \ + data-notify.sh \ + generic-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +nodist_zedexec_SCRIPTS = history_event-zfs-list-cacher.sh + +$(nodist_zedexec_SCRIPTS): %: %.in + -$(SED) -e 's,@bindir\@,$(bindir),g' \ + -e 's,@runstatedir\@,$(runstatedir),g' \ + -e 's,@sbindir\@,$(sbindir),g' \ + -e 's,@sysconfdir\@,$(sysconfdir),g' \ + $< >'$@' + +zedconfdefaults = \ + all-syslog.sh \ + data-notify.sh \ + resilver_finish-notify.sh \ + scrub_finish-notify.sh \ + statechange-led.sh \ + statechange-notify.sh \ + vdev_clear-led.sh \ + vdev_attach-led.sh \ + pool_import-led.sh \ + resilver_finish-start-scrub.sh + +install-data-hook: + $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" + for f in $(zedconfdefaults); do \ + test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ + -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ + ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ + done + chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in index c1513cf3a01f..6d0f44ab3260 100755 --- a/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in +++ b/cmd/zed/zed.d/history_event-zfs-list-cacher.sh.in @@ -47,7 +47,7 @@ case "${ZEVENT_HISTORY_INTERNAL_NAME}" in # Only act if one of the tracked properties is altered. case "${ZEVENT_HISTORY_INTERNAL_STR%%=*}" in canmount|mountpoint|atime|relatime|devices|exec| \ - readonly|setuid|nbmand) ;; + readonly|setuid|nbmand|encroot|keylocation) ;; *) exit 0 ;; esac ;; @@ -62,7 +62,7 @@ zed_lock zfs-list trap abort_alter EXIT PROPS="name,mountpoint,canmount,atime,relatime,devices,exec,readonly" -PROPS="${PROPS},setuid,nbmand" +PROPS="${PROPS},setuid,nbmand,encroot,keylocation" "${ZFS}" list -H -t filesystem -o $PROPS -r "${ZEVENT_POOL}" > "${FSLIST_TMP}" diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index d75f089acd1f..074216055227 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6446,8 +6446,25 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (1); } - if (zfs_mount(zhp, options, flags) != 0) + if (zfs_mount(zhp, options, flags) != 0) { + /* + * Check if a mount sneaked in after we checked + */ + if (!explicit && + libzfs_errno(g_zfs) == EZFS_MOUNTFAILED) { + usleep(10 * MILLISEC); + libzfs_mnttab_cache(g_zfs, B_FALSE); + + if (zfs_is_mounted(zhp, NULL)) { + (void) fprintf(stderr, gettext( + "Ignoring previous 'already " + "mounted' error for '%s'\n"), + zfs_get_name(zhp)); + return (0); + } + } return (1); + } break; } @@ -6733,8 +6750,8 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused) /* * Convenience routine used by zfs_do_umount() and manual_unmount(). Given an - * absolute path, find the entry /proc/self/mounts, verify that its a - * ZFS filesystems, and unmount it appropriately. + * absolute path, find the entry /proc/self/mounts, verify that it's a + * ZFS filesystem, and unmount it appropriately. */ static int unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index b30edb1ccb43..4fd5f025b717 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -785,7 +785,7 @@ add_prop_list_default(const char *propname, char *propval, nvlist_t **props, * -P Display full path for vdev name. * * Adds the given vdevs to 'pool'. As with create, the bulk of this work is - * handled by get_vdev_spec(), which constructs the nvlist needed to pass to + * handled by make_root_vdev(), which constructs the nvlist needed to pass to * libzfs. */ int @@ -883,7 +883,7 @@ zpool_do_add(int argc, char **argv) } } - /* pass off to get_vdev_spec for processing */ + /* pass off to make_root_vdev for processing */ nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun, argc, argv); if (nvroot == NULL) { @@ -1232,9 +1232,9 @@ zpool_do_labelclear(int argc, char **argv) * -O Set fsproperty=value in the pool's root file system * * Creates the named pool according to the given vdev specification. The - * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c. Once - * we get the nvlist back from get_vdev_spec(), we either print out the contents - * (if '-n' was specified), or pass it to libzfs to do the creation. + * bulk of the vdev processing is done in make_root_vdev() in zpool_vdev.c. + * Once we get the nvlist back from make_root_vdev(), we either print out the + * contents (if '-n' was specified), or pass it to libzfs to do the creation. */ int zpool_do_create(int argc, char **argv) @@ -1388,7 +1388,7 @@ zpool_do_create(int argc, char **argv) goto errout; } - /* pass off to get_vdev_spec for bulk processing */ + /* pass off to make_root_vdev for bulk processing */ nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, argc - 1, argv + 1); if (nvroot == NULL) @@ -6111,9 +6111,8 @@ zpool_do_detach(int argc, char **argv) int ret; /* check options */ - while ((c = getopt(argc, argv, "f")) != -1) { + while ((c = getopt(argc, argv, "")) != -1) { switch (c) { - case 'f': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6342,12 +6341,11 @@ zpool_do_online(int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, "et")) != -1) { + while ((c = getopt(argc, argv, "e")) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; - case 't': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 7ea9d742006d..52c696816f73 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -433,6 +433,7 @@ check_disk(const char *path, blkid_cache cache, int force, char *value = blkid_get_tag_value(cache, "TYPE", path); (void) fprintf(stderr, gettext("%s is in use and contains " "a %s filesystem.\n"), path, value ? value : "unknown"); + free(value); return (-1); } diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index a162eceda58f..a65b4cef3d31 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -53,7 +53,6 @@ */ #define DUMP_GROUPING 4 -uint64_t total_write_size = 0; uint64_t total_stream_len = 0; FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; @@ -219,6 +218,9 @@ main(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_payload_size = 0; + uint64_t total_overhead_size = 0; + uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; char salt[ZIO_DATA_SALT_LEN * 2 + 1]; char iv[ZIO_DATA_IV_LEN * 2 + 1]; char mac[ZIO_DATA_MAC_LEN * 2 + 1]; @@ -336,7 +338,9 @@ main(int argc, char *argv[]) } drr_record_count[drr->drr_type]++; + total_overhead_size += sizeof (*drr); total_records++; + payload_size = 0; switch (drr->drr_type) { case DRR_BEGIN: @@ -390,6 +394,7 @@ main(int argc, char *argv[]) nvlist_print(stdout, nv); nvlist_free(nv); } + payload_size = sz; } break; @@ -554,7 +559,6 @@ main(int argc, char *argv[]) if (dump) { print_block(buf, payload_size); } - total_write_size += payload_size; break; case DRR_WRITE_BYREF: @@ -683,6 +687,7 @@ main(int argc, char *argv[]) print_block(buf, P2ROUNDUP(drrwe->drr_psize, 8)); } + payload_size = P2ROUNDUP(drrwe->drr_psize, 8); break; case DRR_OBJECT_RANGE: if (do_byteswap) { @@ -723,6 +728,8 @@ main(int argc, char *argv[]) (longlong_t)drrc->drr_checksum.zc_word[3]); } pcksum = zc; + drr_byte_count[drr->drr_type] += payload_size; + total_payload_size += payload_size; } free(buf); fletcher_4_fini(); @@ -730,28 +737,40 @@ main(int argc, char *argv[]) /* Print final summary */ (void) printf("SUMMARY:\n"); - (void) printf("\tTotal DRR_BEGIN records = %lld\n", - (u_longlong_t)drr_record_count[DRR_BEGIN]); - (void) printf("\tTotal DRR_END records = %lld\n", - (u_longlong_t)drr_record_count[DRR_END]); - (void) printf("\tTotal DRR_OBJECT records = %lld\n", - (u_longlong_t)drr_record_count[DRR_OBJECT]); - (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); - (void) printf("\tTotal DRR_WRITE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE]); - (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); - (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", - (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); - (void) printf("\tTotal DRR_FREE records = %lld\n", - (u_longlong_t)drr_record_count[DRR_FREE]); - (void) printf("\tTotal DRR_SPILL records = %lld\n", - (u_longlong_t)drr_record_count[DRR_SPILL]); + (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_BEGIN], + (u_longlong_t)drr_byte_count[DRR_BEGIN]); + (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_END], + (u_longlong_t)drr_byte_count[DRR_END]); + (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_OBJECT], + (u_longlong_t)drr_byte_count[DRR_OBJECT]); + (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], + (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); + (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE], + (u_longlong_t)drr_byte_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], + (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " + "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], + (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); + (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREE], + (u_longlong_t)drr_byte_count[DRR_FREE]); + (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_SPILL], + (u_longlong_t)drr_byte_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", (u_longlong_t)total_records); - (void) printf("\tTotal write size = %lld (0x%llx)\n", - (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); + (void) printf("\tTotal payload size = %lld (0x%llx)\n", + (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); + (void) printf("\tTotal header overhead = %lld (0x%llx)\n", + (u_longlong_t)total_overhead_size, + (u_longlong_t)total_overhead_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); return (0); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 9c2cf9501831..3bf840d88ed6 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2745,8 +2745,24 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); + + /* + * We open a reference to the spa and then we try to export it + * expecting one of the following errors: + * + * EBUSY + * Because of the reference we just opened. + * + * ZFS_ERR_EXPORT_IN_PROGRESS + * For the case that there is another ztest thread doing + * an export concurrently. + */ VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); - VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); + int error = spa_destroy(zo->zo_pool); + if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) { + fatal(0, "spa_destroy(%s) returned unexpected value %d", + spa->spa_name, error); + } spa_close(spa, FTAG); (void) pthread_rwlock_unlock(&ztest_name_lock); diff --git a/cmd/zvol_wait/Makefile.am b/cmd/zvol_wait/Makefile.am new file mode 100644 index 000000000000..564031c9799d --- /dev/null +++ b/cmd/zvol_wait/Makefile.am @@ -0,0 +1 @@ +dist_bin_SCRIPTS = zvol_wait diff --git a/cmd/zvol_wait/zvol_wait b/cmd/zvol_wait/zvol_wait new file mode 100755 index 000000000000..e5df82dd376a --- /dev/null +++ b/cmd/zvol_wait/zvol_wait @@ -0,0 +1,112 @@ +#!/bin/sh + +count_zvols() { + if [ -z "$zvols" ]; then + echo 0 + else + echo "$zvols" | wc -l + fi +} + +filter_out_zvols_with_links() { + while read -r zvol; do + if [ ! -L "/dev/zvol/$zvol" ]; then + echo "$zvol" + fi + done +} + +filter_out_deleted_zvols() { + while read -r zvol; do + if zfs list "$zvol" >/dev/null 2>&1; then + echo "$zvol" + fi + done +} + +list_zvols() { + zfs list -t volume -H -o name,volmode,receive_resume_token | + while read -r zvol_line; do + name=$(echo "$zvol_line" | awk '{print $1}') + volmode=$(echo "$zvol_line" | awk '{print $2}') + token=$(echo "$zvol_line" | awk '{print $3}') + # + # /dev links are not created for zvols with volmode = "none". + # + [ "$volmode" = "none" ] && continue + # + # We also also ignore partially received zvols if it is + # not an incremental receive, as those won't even have a block + # device minor node created yet. + # + if [ "$token" != "-" ]; then + # + # Incremental receives create an invisible clone that + # is not automatically displayed by zfs list. + # + if ! zfs list "$name/%recv" >/dev/null 2>&1; then + continue + fi + fi + echo "$name" + done +} + +zvols=$(list_zvols) +zvols_count=$(count_zvols) +if [ "$zvols_count" -eq 0 ]; then + echo "No zvols found, nothing to do." + exit 0 +fi + +echo "Testing $zvols_count zvol links" + +outer_loop=0 +while [ "$outer_loop" -lt 20 ]; do + outer_loop=$((outer_loop + 1)) + + old_zvols_count=$(count_zvols) + + inner_loop=0 + while [ "$inner_loop" -lt 30 ]; do + inner_loop=$((inner_loop + 1)) + + zvols="$(echo "$zvols" | filter_out_zvols_with_links)" + + zvols_count=$(count_zvols) + if [ "$zvols_count" -eq 0 ]; then + echo "All zvol links are now present." + exit 0 + fi + sleep 1 + done + + echo "Still waiting on $zvols_count zvol links ..." + # + # Although zvols should normally not be deleted at boot time, + # if that is the case then their links will be missing and + # we would stall. + # + if [ "$old_zvols_count" -eq "$zvols_count" ]; then + echo "No progress since last loop." + echo "Checking if any zvols were deleted." + + zvols=$(echo "$zvols" | filter_out_deleted_zvols) + zvols_count=$(count_zvols) + + if [ "$old_zvols_count" -ne "$zvols_count" ]; then + echo "$((old_zvols_count - zvols_count)) zvol(s) deleted." + fi + + if [ "$zvols_count" -ne 0 ]; then + echo "Remaining zvols:" + echo "$zvols" + else + echo "All zvol links are now present." + exit 0 + fi + fi +done + +echo "Timed out waiting on zvol links" +exit 1 diff --git a/config/always-python.m4 b/config/always-python.m4 index 7cfefd9ebcae..c1c07597e688 100644 --- a/config/always-python.m4 +++ b/config/always-python.m4 @@ -1,47 +1,3 @@ -dnl # -dnl # ZFS_AC_PYTHON_VERSION(version, [action-if-true], [action-if-false]) -dnl # -dnl # Verify Python version -dnl # -AC_DEFUN([ZFS_AC_PYTHON_VERSION], [ - ver_check=`$PYTHON -c "import sys; print (sys.version.split()[[0]] $1)"` - AS_IF([test "$ver_check" = "True"], [ - m4_ifvaln([$2], [$2]) - ], [ - m4_ifvaln([$3], [$3]) - ]) -]) - -dnl # -dnl # ZFS_AC_PYTHON_VERSION_IS_2 -dnl # ZFS_AC_PYTHON_VERSION_IS_3 -dnl # -dnl # Tests if the $PYTHON_VERSION matches 2.x or 3.x. -dnl # -AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_2], - [test "${PYTHON_VERSION%%\.*}" = "2"]) -AC_DEFUN([ZFS_AC_PYTHON_VERSION_IS_3], - [test "${PYTHON_VERSION%%\.*}" = "3"]) - -dnl # -dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) -dnl # -dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE -dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html -dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. -dnl # -AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ - PYTHON_NAME=`basename $PYTHON` - AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) - AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ - AC_MSG_RESULT(yes) - m4_ifvaln([$2], [$2]) - ], [ - AC_MSG_RESULT(no) - m4_ifvaln([$3], [$3]) - ]) -]) - dnl # dnl # The majority of the python scripts are written to be compatible dnl # with Python 2.6 and Python 3.4. Therefore, they may be installed @@ -66,35 +22,38 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [ [AC_MSG_ERROR([Unknown --with-python value '$with_python'])] ) - AS_IF([test $PYTHON != :], [ - AS_IF([$PYTHON --version >/dev/null 2>&1], - [AM_PATH_PYTHON([2.6], [], [:])], - [AC_MSG_ERROR([Cannot find $PYTHON in your system path])] - ) - ]) - AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) - AM_CONDITIONAL([USING_PYTHON_2], [ZFS_AC_PYTHON_VERSION_IS_2]) - AM_CONDITIONAL([USING_PYTHON_3], [ZFS_AC_PYTHON_VERSION_IS_3]) - dnl # dnl # Minimum supported Python versions for utilities: - dnl # Python 2.6.x, or Python 3.4.x + dnl # Python 2.6 or Python 3.4 dnl # - AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ - ZFS_AC_PYTHON_VERSION([>= '2.6'], [ true ], - [AC_MSG_ERROR("Python >= 2.6.x is not available")]) + AM_PATH_PYTHON([], [], [:]) + AS_IF([test -z "$PYTHON_VERSION"], [ + PYTHON_VERSION=$(basename $PYTHON | tr -cd 0-9.) ]) + PYTHON_MINOR=${PYTHON_VERSION#*\.} - AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ - ZFS_AC_PYTHON_VERSION([>= '3.4'], [ true ], - [AC_MSG_ERROR("Python >= 3.4.x is not available")]) - ]) + AS_CASE([$PYTHON_VERSION], + [2.*], [ + AS_IF([test $PYTHON_MINOR -lt 6], + [AC_MSG_ERROR("Python >= 2.6 is required")]) + ], + [3.*], [ + AS_IF([test $PYTHON_MINOR -lt 4], + [AC_MSG_ERROR("Python >= 3.4 is required")]) + ], + [:|2|3], [], + [PYTHON_VERSION=3] + ) + + AM_CONDITIONAL([USING_PYTHON], [test "$PYTHON" != :]) + AM_CONDITIONAL([USING_PYTHON_2], [test "x${PYTHON_VERSION%%\.*}" = x2]) + AM_CONDITIONAL([USING_PYTHON_3], [test "x${PYTHON_VERSION%%\.*}" = x3]) dnl # dnl # Request that packages be built for a specific Python version. dnl # - AS_IF([test $with_python != check], [ - PYTHON_PKG_VERSION=`echo ${PYTHON} | tr -d 'a-zA-Z.'` + AS_IF([test "x$with_python" != xcheck], [ + PYTHON_PKG_VERSION=$(echo $PYTHON_VERSION | tr -d .) DEFINE_PYTHON_PKG_VERSION='--define "__use_python_pkg_version '${PYTHON_PKG_VERSION}'"' DEFINE_PYTHON_VERSION='--define "__use_python '${PYTHON}'"' ], [ diff --git a/config/always-pyzfs.m4 b/config/always-pyzfs.m4 index 6f32e98feed2..f620a8f9a18b 100644 --- a/config/always-pyzfs.m4 +++ b/config/always-pyzfs.m4 @@ -1,5 +1,24 @@ dnl # -dnl # Determines if pyzfs can be built, requires Python 2.7 or latter. +dnl # ZFS_AC_PYTHON_MODULE(module_name, [action-if-true], [action-if-false]) +dnl # +dnl # Checks for Python module. Freely inspired by AX_PYTHON_MODULE +dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html +dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS. +dnl # +AC_DEFUN([ZFS_AC_PYTHON_MODULE], [ + PYTHON_NAME=$(basename $PYTHON) + AC_MSG_CHECKING([for $PYTHON_NAME module: $1]) + AS_IF([$PYTHON -c "import $1" 2>/dev/null], [ + AC_MSG_RESULT(yes) + m4_ifvaln([$2], [$2]) + ], [ + AC_MSG_RESULT(no) + m4_ifvaln([$3], [$3]) + ]) +]) + +dnl # +dnl # Determines if pyzfs can be built, requires Python 2.7 or later. dnl # AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ AC_ARG_ENABLE([pyzfs], @@ -18,7 +37,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ DEFINE_PYZFS='--without pyzfs' ]) ], [ - AS_IF([test $PYTHON != :], [ + AS_IF([test "$PYTHON" != :], [ DEFINE_PYZFS='' ], [ enable_pyzfs=no @@ -31,20 +50,16 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # Require python-devel libraries dnl # AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [ - AS_IF([ZFS_AC_PYTHON_VERSION_IS_2], [ - PYTHON_REQUIRED_VERSION=">= '2.7.0'" - ], [ - AS_IF([ZFS_AC_PYTHON_VERSION_IS_3], [ - PYTHON_REQUIRED_VERSION=">= '3.4.0'" - ], [ - AC_MSG_ERROR("Python $PYTHON_VERSION unknown") - ]) - ]) + AS_CASE([$PYTHON_VERSION], + [3.*], [PYTHON_REQUIRED_VERSION=">= '3.4.0'"], + [2.*], [PYTHON_REQUIRED_VERSION=">= '2.7.0'"], + [AC_MSG_ERROR("Python $PYTHON_VERSION unknown")] + ) AX_PYTHON_DEVEL([$PYTHON_REQUIRED_VERSION], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_REQUIRED_VERSION development library is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -57,7 +72,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([setuptools], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION setuptools is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -70,7 +85,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ ZFS_AC_PYTHON_MODULE([cffi], [], [ AS_IF([test "x$enable_pyzfs" = xyes], [ AC_MSG_ERROR("Python $PYTHON_VERSION cffi is not installed") - ], [test ! "x$enable_pyzfs" = xno], [ + ], [test "x$enable_pyzfs" != xno], [ enable_pyzfs=no ]) ]) @@ -81,7 +96,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [ dnl # AS_IF([test "x$enable_pyzfs" = xcheck], [enable_pyzfs=yes]) - AM_CONDITIONAL([PYZFS_ENABLED], [test x$enable_pyzfs = xyes]) + AM_CONDITIONAL([PYZFS_ENABLED], [test "x$enable_pyzfs" = xyes]) AC_SUBST([PYZFS_ENABLED], [$enable_pyzfs]) AC_SUBST(pythonsitedir, [$PYTHON_SITE_PKG]) diff --git a/config/deb.am b/config/deb.am index e405547aa949..83059a923493 100644 --- a/config/deb.am +++ b/config/deb.am @@ -20,7 +20,7 @@ deb-kmod: deb-local rpm-kmod arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 @@ -30,7 +30,7 @@ deb-dkms: deb-local rpm-dkms arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \ debarch=`$(DPKG) --print-architecture`; \ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \ - fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1; \ + fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch $$pkg1 || exit 1; \ $(RM) $$pkg1 deb-utils: deb-local rpm-utils @@ -45,7 +45,7 @@ deb-utils: deb-local rpm-utils pkg5=libzpool2-$${version}.$${arch}.rpm; \ pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \ pkg7=$${name}-test-$${version}.$${arch}.rpm; \ - pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \ + pkg8=$${name}-dracut-$${version}.noarch.rpm; \ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \ pkg10=`ls python*-pyzfs-$${version}* | tail -1`; \ ## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism @@ -63,7 +63,7 @@ deb-utils: deb-local rpm-utils env PATH=$${path_prepend}:$${PATH} \ fakeroot $(ALIEN) --bump=0 --scripts --to-deb --target=$$debarch \ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ - $$pkg8 $$pkg9 $$pkg10; \ + $$pkg8 $$pkg9 $$pkg10 || exit 1; \ $(RM) $${path_prepend}/dh_shlibdeps; \ rmdir $${path_prepend}; \ $(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \ diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index 5fff79a74c70..ebb02fb09a28 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -18,7 +18,8 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ #include ],[ ],[ - AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, [kernel has asm/fpu/api.h]) + AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, + [kernel has asm/fpu/api.h]) AC_MSG_RESULT(asm/fpu/api.h) ],[ AC_MSG_RESULT(i387.h & xcr.h) @@ -39,8 +40,10 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ kernel_fpu_end(); ], [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [ AC_MSG_RESULT(kernel_fpu_*) - AC_DEFINE(HAVE_KERNEL_FPU, 1, [kernel has kernel_fpu_* functions]) - AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) + AC_DEFINE(HAVE_KERNEL_FPU, 1, + [kernel has kernel_fpu_* functions]) + AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, + [kernel exports FPU functions]) ],[ ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include diff --git a/config/kernel-spinlock.m4 b/config/kernel-spinlock.m4 deleted file mode 100644 index d6d6640070b5..000000000000 --- a/config/kernel-spinlock.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.36 API change, -dnl # The 'struct fs_struct->lock' was changed from a rwlock_t to -dnl # a spinlock_t to improve the fastpath performance. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK], [ - AC_MSG_CHECKING([whether struct fs_struct uses spinlock_t]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ - #include - #include - ],[ - static struct fs_struct fs; - spin_lock_init(&fs.lock); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FS_STRUCT_SPINLOCK, 1, - [struct fs_struct uses spinlock_t]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index 4dc3f84ed47e..b0e1afa153ab 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -1,26 +1,51 @@ +dnl # 4.14-rc3 API change +dnl # https://lwn.net/Articles/735887/ dnl # -dnl # 4.15 API change -dnl # https://lkml.org/lkml/2017/11/25/90 dnl # Check if timer_list.func get passed a timer_list or an unsigned long dnl # (older kernels). Also sanity check the from_timer() and timer_setup() dnl # macros are available as well, since they will be used in the same newer dnl # kernels that support the new timer_list.func signature. dnl # -AC_DEFUN([ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST], [ - AC_MSG_CHECKING([whether timer_list.function gets a timer_list]) +dnl # Also check for the existance of flags in struct timer_list, they were +dnl # added in 4.1-rc8 via 0eeda71bc30d. + +AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ + AC_MSG_CHECKING([whether timer_setup() is available]) tmp_flags="$EXTRA_KCFLAGS" EXTRA_KCFLAGS="-Werror" + ZFS_LINUX_TRY_COMPILE([ #include - void task_expire(struct timer_list *tl) {} + + struct my_task_timer { + struct timer_list timer; + int data; + }; + + void task_expire(struct timer_list *tl) + { + struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); + task_timer->data = 42; + } + ],[ + struct my_task_timer task_timer; + timer_setup(&task_timer.timer, task_expire, 0); ],[ - #ifndef from_timer - #error "No from_timer() macro" - #endif + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, + [timer_setup() is available]) + ],[ + AC_MSG_RESULT(no) + ]) - struct timer_list timer; - timer.function = task_expire; - timer_setup(&timer, NULL, 0); + AC_MSG_CHECKING([whether timer function expects timer_list]) + + ZFS_LINUX_TRY_COMPILE([ + #include + void task_expire(struct timer_list *tl) {} + ],[ + struct timer_list tl; + tl.function = task_expire; ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, @@ -28,5 +53,21 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST], [ ],[ AC_MSG_RESULT(no) ]) + + AC_MSG_CHECKING([whether struct timer_list has flags]) + + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct timer_list tl; + tl.flags = 2; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, + [struct timer_list has a flags member]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" ]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9a36302c0489..8e89c8014d8a 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -12,7 +12,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CTL_NAME ZFS_AC_KERNEL_PDE_DATA ZFS_AC_KERNEL_2ARGS_VFS_FSYNC - ZFS_AC_KERNEL_FS_STRUCT_SPINLOCK ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_FALLOCATE ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE @@ -36,7 +35,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_GROUP_INFO_GID ZFS_AC_KERNEL_WRITE ZFS_AC_KERNEL_READ - ZFS_AC_KERNEL_TIMER_FUNCTION_TIMER_LIST + ZFS_AC_KERNEL_TIMER_SETUP ZFS_AC_KERNEL_DECLARE_EVENT_CLASS ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS diff --git a/configure.ac b/configure.ac index db614084e37e..a3ac134ffccf 100644 --- a/configure.ac +++ b/configure.ac @@ -120,8 +120,10 @@ AC_CONFIG_FILES([ cmd/dbufstat/Makefile cmd/arc_summary/Makefile cmd/zed/Makefile + cmd/zed/zed.d/Makefile cmd/raidz_test/Makefile cmd/zgenhostid/Makefile + cmd/zvol_wait/Makefile contrib/Makefile contrib/bash_completion.d/Makefile contrib/dracut/Makefile @@ -271,6 +273,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/cli_user/zfs_list/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile + tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile tests/zfs-tests/tests/functional/compression/Makefile tests/zfs-tests/tests/functional/cp_files/Makefile tests/zfs-tests/tests/functional/ctime/Makefile @@ -326,6 +329,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/snapshot/Makefile tests/zfs-tests/tests/functional/snapused/Makefile tests/zfs-tests/tests/functional/sparse/Makefile + tests/zfs-tests/tests/functional/suid/Makefile tests/zfs-tests/tests/functional/alloc_class/Makefile tests/zfs-tests/tests/functional/threadsappend/Makefile tests/zfs-tests/tests/functional/tmpfile/Makefile diff --git a/contrib/dracut/90zfs/zfs-lib.sh.in b/contrib/dracut/90zfs/zfs-lib.sh.in index 23c07af9e86f..44021c6e5fc1 100755 --- a/contrib/dracut/90zfs/zfs-lib.sh.in +++ b/contrib/dracut/90zfs/zfs-lib.sh.in @@ -144,7 +144,7 @@ ask_for_password() { { flock -s 9; # Prompt for password with plymouth, if installed and running. - if whereis plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then + if type plymouth >/dev/null 2>&1 && plymouth --ping 2>/dev/null; then plymouth ask-for-password \ --prompt "$ply_prompt" --number-of-tries="$ply_tries" \ --command="$ply_cmd" diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 87ec7a86f5ac..9f912d946649 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -11,13 +11,18 @@ EXTRA_DIST = \ $(top_srcdir)/contrib/initramfs/README.initramfs.markdown install-initrdSCRIPTS: $(EXTRA_DIST) - for d in conf.d conf-hooks.d hooks scripts scripts/local-top; do \ - $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ - cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ - $(DESTDIR)$(initrddir)/$$d/; \ + for d in conf.d conf-hooks.d scripts/local-top; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_srcdir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ done - if [ -f etc/init.d/zfs ]; then \ - $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ - cp $(top_srcdir)/etc/init.d/zfs \ - $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ + for d in hooks scripts; do \ + $(MKDIR_P) $(DESTDIR)$(initrddir)/$$d; \ + cp $(top_builddir)/contrib/initramfs/$$d/zfs \ + $(DESTDIR)$(initrddir)/$$d/; \ + done + if [ -f $(top_builddir)/etc/init.d/zfs ]; then \ + $(MKDIR_P) $(DESTDIR)$(DEFAULT_INITCONF_DIR); \ + cp $(top_builddir)/etc/init.d/zfs \ + $(DESTDIR)$(DEFAULT_INITCONF_DIR)/; \ fi diff --git a/contrib/initramfs/scripts/zfs.in b/contrib/initramfs/scripts/zfs.in index ad604a82ce52..05410ea2bdce 100644 --- a/contrib/initramfs/scripts/zfs.in +++ b/contrib/initramfs/scripts/zfs.in @@ -411,29 +411,29 @@ decrypt_fs() # Determine dataset that holds key for root dataset ENCRYPTIONROOT=$(${ZFS} get -H -o value encryptionroot "${fs}") - DECRYPT_CMD="${ZFS} load-key '${ENCRYPTIONROOT}'" # If root dataset is encrypted... if ! [ "${ENCRYPTIONROOT}" = "-" ]; then - + TRY_COUNT=3 # Prompt with plymouth, if active if [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then - plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" \ - --number-of-tries="3" \ - --command="${DECRYPT_CMD}" + while [ $TRY_COUNT -gt 0 ]; do + plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \ + $ZFS load-key "${ENCRYPTIONROOT}" && break + TRY_COUNT=$((TRY_COUNT - 1)) + done # Prompt with systemd, if active elif [ -e /run/systemd/system ]; then - TRY_COUNT=3 while [ $TRY_COUNT -gt 0 ]; do systemd-ask-password "Encrypted ZFS password for ${ENCRYPTIONROOT}" --no-tty | \ - ${DECRYPT_CMD} && break + $ZFS load-key "${ENCRYPTIONROOT}" && break TRY_COUNT=$((TRY_COUNT - 1)) done # Prompt with ZFS tty, otherwise else - eval "${DECRYPT_CMD}" + $ZFS load-key "${ENCRYPTIONROOT}" fi fi fi diff --git a/contrib/pyzfs/Makefile.am b/contrib/pyzfs/Makefile.am index 1549bf237932..fa1bb32ce2eb 100644 --- a/contrib/pyzfs/Makefile.am +++ b/contrib/pyzfs/Makefile.am @@ -24,7 +24,7 @@ all-local: # files are later created by manually loading the Python modules. # install-exec-local: - $(PYTHON) $(srcdir)/setup.py install \ + $(PYTHON) $(builddir)/setup.py install \ --prefix $(prefix) \ --root $(DESTDIR)/ \ --install-lib $(pythonsitedir) \ diff --git a/etc/init.d/zfs-functions.in b/etc/init.d/zfs-functions.in index 490503e91391..cbc7fd22a0a0 100644 --- a/etc/init.d/zfs-functions.in +++ b/etc/init.d/zfs-functions.in @@ -294,13 +294,6 @@ checksystem() # Just make sure that /dev/zfs is created. udev_trigger - if ! [ "$(uname -m)" = "x86_64" ]; then - echo "Warning: You're not running 64bit. Currently native zfs in"; - echo " Linux is only supported and tested on 64bit."; - # should we break here? People doing this should know what they - # do, thus i'm not breaking here. - fi - return 0 } diff --git a/etc/systemd/system-generators/zfs-mount-generator.in b/etc/systemd/system-generators/zfs-mount-generator.in index 5428eb25d92c..3e529cb67bb3 100755 --- a/etc/systemd/system-generators/zfs-mount-generator.in +++ b/etc/systemd/system-generators/zfs-mount-generator.in @@ -71,6 +71,8 @@ process_line() { p_readonly="${8}" p_setuid="${9}" p_nbmand="${10}" + p_encroot="${11}" + p_keyloc="${12}" # Check for canmount=off . if [ "${p_canmount}" = "off" ] ; then @@ -168,6 +170,54 @@ process_line() { "${dataset}" >/dev/kmsg fi + # Minimal pre-requisites to mount a ZFS dataset + wants="zfs-import.target" + if [ -n "${p_encroot}" ] && + [ "${p_encroot}" != "-" ] ; then + keyloadunit="zfs-load-key-$(systemd-escape "${p_encroot}").service" + if [ "${p_encroot}" = "${dataset}" ] ; then + pathdep="" + if [ "${p_keyloc%%://*}" = "file" ] ; then + pathdep="RequiresMountsFor='${p_keyloc#file://}'" + keyloadcmd="@sbindir@/zfs load-key '${dataset}'" + elif [ "${p_keyloc}" = "prompt" ] ; then + keyloadcmd="sh -c 'set -eu;"\ +"count=0;"\ +"while [ \$\$count -lt 3 ];do"\ +" systemd-ask-password --id=\"zfs:${dataset}\""\ +" \"Enter passphrase for ${dataset}:\"|"\ +" @sbindir@/zfs load-key \"${dataset}\" && exit 0;"\ +" count=\$\$((count + 1));"\ +"done;"\ +"exit 1'" + else + printf 'zfs-mount-generator: (%s) invalid keylocation\n' \ + "${dataset}" >/dev/kmsg + fi + cat > "${dest_norm}/${keyloadunit}" << EOF +# Automatically generated by zfs-mount-generator + +[Unit] +Description=Load ZFS key for ${dataset} +SourcePath=${cachefile} +Documentation=man:zfs-mount-generator(8) +DefaultDependencies=no +Wants=${wants} +After=${wants} +${pathdep} + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=${keyloadcmd} +ExecStop=@sbindir@/zfs unload-key '${dataset}' +EOF + fi + # Update the dependencies for the mount file to require the + # key-loading unit. + wants="${wants} ${keyloadunit}" + fi + # If the mountpoint has already been created, give it precedence. if [ -e "${dest_norm}/${mountfile}" ] ; then printf 'zfs-mount-generator: %s already exists\n' "${mountfile}" \ @@ -183,8 +233,8 @@ process_line() { SourcePath=${cachefile} Documentation=man:zfs-mount-generator(8) Before=local-fs.target zfs-mount.service -After=zfs-import.target -Wants=zfs-import.target +After=${wants} +Wants=${wants} [Mount] Where=${p_mountpoint} diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in index 884a69b5b683..e4056a92cd98 100644 --- a/etc/systemd/system/50-zfs.preset.in +++ b/etc/systemd/system/50-zfs.preset.in @@ -5,4 +5,5 @@ enable zfs-import.target enable zfs-mount.service enable zfs-share.service enable zfs-zed.service +enable zfs-volume-wait.service enable zfs.target diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am index 1586209caa6d..9249f15eb455 100644 --- a/etc/systemd/system/Makefile.am +++ b/etc/systemd/system/Makefile.am @@ -7,7 +7,9 @@ systemdunit_DATA = \ zfs-import-scan.service \ zfs-mount.service \ zfs-share.service \ + zfs-volume-wait.service \ zfs-import.target \ + zfs-volumes.target \ zfs.target EXTRA_DIST = \ @@ -17,6 +19,8 @@ EXTRA_DIST = \ $(top_srcdir)/etc/systemd/system/zfs-mount.service.in \ $(top_srcdir)/etc/systemd/system/zfs-share.service.in \ $(top_srcdir)/etc/systemd/system/zfs-import.target.in \ + $(top_srcdir)/etc/systemd/system/zfs-volume-wait.service.in \ + $(top_srcdir)/etc/systemd/system/zfs-volumes.target.in \ $(top_srcdir)/etc/systemd/system/zfs.target.in \ $(top_srcdir)/etc/systemd/system/50-zfs.preset.in diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in index 75ff6e946767..5f4ba411b3cd 100644 --- a/etc/systemd/system/zfs-share.service.in +++ b/etc/systemd/system/zfs-share.service.in @@ -5,6 +5,7 @@ After=nfs-server.service nfs-kernel-server.service After=smb.service Before=rpc-statd-notify.service Wants=zfs-mount.service +After=zfs-mount.service PartOf=nfs-server.service nfs-kernel-server.service PartOf=smb.service diff --git a/etc/systemd/system/zfs-volume-wait.service.in b/etc/systemd/system/zfs-volume-wait.service.in new file mode 100644 index 000000000000..75bd9fcdd56c --- /dev/null +++ b/etc/systemd/system/zfs-volume-wait.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=Wait for ZFS Volume (zvol) links in /dev +DefaultDependencies=no +After=systemd-udev-settle.service +After=zfs-import.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@bindir@/zvol_wait + +[Install] +WantedBy=zfs-volumes.target diff --git a/etc/systemd/system/zfs-volumes.target.in b/etc/systemd/system/zfs-volumes.target.in new file mode 100644 index 000000000000..5cb9a10f49c5 --- /dev/null +++ b/etc/systemd/system/zfs-volumes.target.in @@ -0,0 +1,7 @@ +[Unit] +Description=ZFS volumes are ready +After=zfs-volume-wait.service +Requires=zfs-volume-wait.service + +[Install] +WantedBy=zfs.target diff --git a/include/libzfs.h b/include/libzfs.h index e2ec2d9bce7b..a5b2a8393f43 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -147,6 +147,7 @@ typedef enum zfs_error { EZFS_NO_TRIM, /* no active trim */ EZFS_TRIM_NOTSUP, /* device does not support trim */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ + EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_UNKNOWN } zfs_error_t; diff --git a/include/linux/simd_aarch64.h b/include/linux/simd_aarch64.h index 155ef6205599..56153a16072e 100644 --- a/include/linux/simd_aarch64.h +++ b/include/linux/simd_aarch64.h @@ -26,8 +26,10 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_initialize() + * kfpu_begin() + * kfpu_end() */ #ifndef _SIMD_AARCH64_H diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h index 12cd7467788e..0489bfaa3a70 100644 --- a/include/linux/simd_x86.h +++ b/include/linux/simd_x86.h @@ -26,8 +26,10 @@ * USER API: * * Kernel fpu methods: - * kfpu_begin() - * kfpu_end() + * kfpu_allowed() + * kfpu_initialize() + * kfpu_begin() + * kfpu_end() * * SIMD support: * @@ -37,31 +39,31 @@ * all relevant feature test functions should be called. * * Supported features: - * zfs_sse_available() - * zfs_sse2_available() - * zfs_sse3_available() - * zfs_ssse3_available() - * zfs_sse4_1_available() - * zfs_sse4_2_available() + * zfs_sse_available() + * zfs_sse2_available() + * zfs_sse3_available() + * zfs_ssse3_available() + * zfs_sse4_1_available() + * zfs_sse4_2_available() * - * zfs_avx_available() - * zfs_avx2_available() + * zfs_avx_available() + * zfs_avx2_available() * - * zfs_bmi1_available() - * zfs_bmi2_available() + * zfs_bmi1_available() + * zfs_bmi2_available() * - * zfs_avx512f_available() - * zfs_avx512cd_available() - * zfs_avx512er_available() - * zfs_avx512pf_available() - * zfs_avx512bw_available() - * zfs_avx512dq_available() - * zfs_avx512vl_available() - * zfs_avx512ifma_available() - * zfs_avx512vbmi_available() + * zfs_avx512f_available() + * zfs_avx512cd_available() + * zfs_avx512er_available() + * zfs_avx512pf_available() + * zfs_avx512bw_available() + * zfs_avx512dq_available() + * zfs_avx512vl_available() + * zfs_avx512ifma_available() + * zfs_avx512vbmi_available() * * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers - * also add zfs_avx512vl_available() to feature check. + * also add zfs_avx512vl_available() to feature check. */ #ifndef _SIMD_X86_H @@ -190,7 +192,7 @@ typedef struct cpuid_feature_desc { * Descriptions of supported instruction sets */ static const cpuid_feature_desc_t cpuid_features[] = { - [SSE] = {1U, 0U, 1U << 25, EDX }, + [SSE] = {1U, 0U, 1U << 25, EDX }, [SSE2] = {1U, 0U, 1U << 26, EDX }, [SSE3] = {1U, 0U, 1U << 0, ECX }, [SSSE3] = {1U, 0U, 1U << 9, ECX }, diff --git a/include/spl/sys/mutex.h b/include/spl/sys/mutex.h index ed0cd4932cfa..73da23685590 100644 --- a/include/spl/sys/mutex.h +++ b/include/spl/sys/mutex.h @@ -127,6 +127,8 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ }) /* END CSTYLED */ +#define NESTED_SINGLE 1 + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define mutex_enter_nested(mp, subclass) \ { \ @@ -179,7 +181,4 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \ /* NOTE: do not dereference mp after this point */ \ } -int spl_mutex_init(void); -void spl_mutex_fini(void); - #endif /* _SPL_MUTEX_H */ diff --git a/include/spl/sys/rwlock.h b/include/spl/sys/rwlock.h index 408defac20d3..60f5bfd986b4 100644 --- a/include/spl/sys/rwlock.h +++ b/include/spl/sys/rwlock.h @@ -29,43 +29,6 @@ #include #include -/* Linux kernel compatibility */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (0) -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -#define SPL_RWSEM_SINGLE_READER_VALUE (1) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1) -#elif defined(RWSEM_ACTIVE_MASK) -#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS) -#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS) -#endif - -/* Linux 3.16 changed activity to count for rwsem-spinlock */ -#if defined(CONFIG_PREEMPT_RT_FULL) -#define RWSEM_COUNT(sem) sem->read_depth -#elif defined(HAVE_RWSEM_ACTIVITY) -#define RWSEM_COUNT(sem) sem->activity -/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */ -#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -#define RWSEM_COUNT(sem) atomic_long_read(&(sem)->count) -#else -#define RWSEM_COUNT(sem) sem->count -#endif - -#if defined(RWSEM_SPINLOCK_IS_RAW) -#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) \ - raw_spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) raw_spin_trylock_irqsave(lk, fl) -#else -#define spl_rwsem_lock_irqsave(lk, fl) spin_lock_irqsave(lk, fl) -#define spl_rwsem_unlock_irqrestore(lk, fl) spin_unlock_irqrestore(lk, fl) -#define spl_rwsem_trylock_irqsave(lk, fl) spin_trylock_irqsave(lk, fl) -#endif /* RWSEM_SPINLOCK_IS_RAW */ - -#define spl_rwsem_is_locked(rwsem) rwsem_is_locked(rwsem) - typedef enum { RW_DRIVER = 2, RW_DEFAULT = 4, @@ -78,15 +41,9 @@ typedef enum { RW_READER = 2 } krw_t; -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner - * field, so we don't need our own. - */ typedef struct { struct rw_semaphore rw_rwlock; -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER kthread_t *rw_owner; -#endif #ifdef CONFIG_LOCKDEP krw_type_t rw_type; #endif /* CONFIG_LOCKDEP */ @@ -97,31 +54,19 @@ typedef struct { static inline void spl_rw_set_owner(krwlock_t *rwp) { -/* - * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write, - * downgrade_write and __init_rwsem will set/clear owner for us. - */ -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = current; -#endif } static inline void spl_rw_clear_owner(krwlock_t *rwp) { -#ifndef CONFIG_RWSEM_SPIN_ON_OWNER rwp->rw_owner = NULL; -#endif } static inline kthread_t * rw_owner(krwlock_t *rwp) { -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - return (SEM(rwp)->owner); -#else return (rwp->rw_owner); -#endif } #ifdef CONFIG_LOCKDEP @@ -148,62 +93,22 @@ spl_rw_lockdep_on_maybe(krwlock_t *rwp) \ #define spl_rw_lockdep_on_maybe(rwp) #endif /* CONFIG_LOCKDEP */ - static inline int -RW_WRITE_HELD(krwlock_t *rwp) +RW_LOCK_HELD(krwlock_t *rwp) { - return (rw_owner(rwp) == current); + return (rwsem_is_locked(SEM(rwp))); } static inline int -RW_LOCK_HELD(krwlock_t *rwp) +RW_WRITE_HELD(krwlock_t *rwp) { - return (spl_rwsem_is_locked(SEM(rwp))); + return (rw_owner(rwp) == current); } static inline int RW_READ_HELD(krwlock_t *rwp) { - if (!RW_LOCK_HELD(rwp)) - return (0); - - /* - * rw_semaphore cheat sheet: - * - * < 3.16: - * There's no rw_semaphore.owner, so use rwp.owner instead. - * If rwp.owner == NULL then it's a reader - * - * 3.16 - 4.7: - * rw_semaphore.owner added (https://lwn.net/Articles/596656/) - * and CONFIG_RWSEM_SPIN_ON_OWNER introduced. - * If rw_semaphore.owner == NULL then it's a reader - * - * 4.8 - 4.16.16: - * RWSEM_READER_OWNED added as an internal #define. - * (https://lore.kernel.org/patchwork/patch/678590/) - * If rw_semaphore.owner == 1 then it's a reader - * - * 4.16.17 - 4.19: - * RWSEM_OWNER_UNKNOWN introduced as ((struct task_struct *)-1L) - * (https://do-db2.lkml.org/lkml/2018/5/15/985) - * If rw_semaphore.owner == 1 then it's a reader. - * - * 4.20+: - * RWSEM_OWNER_UNKNOWN changed to ((struct task_struct *)-2L) - * (https://lkml.org/lkml/2018/9/6/986) - * If rw_semaphore.owner & 1 then it's a reader, and also the reader's - * task_struct may be embedded in rw_semaphore->owner. - */ -#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && defined(RWSEM_OWNER_UNKNOWN) - if (RWSEM_OWNER_UNKNOWN == (struct task_struct *)-2L) { - /* 4.20+ kernels with CONFIG_RWSEM_SPIN_ON_OWNER */ - return ((unsigned long) SEM(rwp)->owner & 1); - } -#endif - - /* < 4.20 kernel or !CONFIG_RWSEM_SPIN_ON_OWNER */ - return (rw_owner(rwp) == NULL || (unsigned long) rw_owner(rwp) == 1); + return (RW_LOCK_HELD(rwp) && rw_owner(rwp) == NULL); } /* @@ -228,6 +133,12 @@ RW_READ_HELD(krwlock_t *rwp) */ #define rw_destroy(rwp) ((void) 0) +/* + * Upgrading a rwsem from a reader to a writer is not supported by the + * Linux kernel. The lock must be dropped and reacquired as a writer. + */ +#define rw_tryupgrade(rwp) RW_WRITE_HELD(rwp) + #define rw_tryenter(rwp, rw) \ ({ \ int _rc_ = 0; \ @@ -285,25 +196,6 @@ RW_READ_HELD(krwlock_t *rwp) downgrade_write(SEM(rwp)); \ spl_rw_lockdep_on_maybe(rwp); \ }) - -#define rw_tryupgrade(rwp) \ -({ \ - int _rc_ = 0; \ - \ - if (RW_WRITE_HELD(rwp)) { \ - _rc_ = 1; \ - } else { \ - spl_rw_lockdep_off_maybe(rwp); \ - if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \ - spl_rw_set_owner(rwp); \ - spl_rw_lockdep_on_maybe(rwp); \ - } \ - _rc_; \ -}) /* END CSTYLED */ -int spl_rw_init(void); -void spl_rw_fini(void); -int rwsem_tryupgrade(struct rw_semaphore *rwsem); - #endif /* _SPL_RWLOCK_H */ diff --git a/include/spl/sys/timer.h b/include/spl/sys/timer.h index a6b134570cd8..31d89d3b97d6 100644 --- a/include/spl/sys/timer.h +++ b/include/spl/sys/timer.h @@ -72,4 +72,29 @@ usleep_range(unsigned long min, unsigned long max) #define USEC_TO_TICK(us) usecs_to_jiffies(us) #define NSEC_TO_TICK(ns) usecs_to_jiffies(ns / NSEC_PER_USEC) +#ifndef from_timer +#define from_timer(var, timer, timer_field) \ + container_of(timer, typeof(*var), timer_field) +#endif + +#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST +typedef struct timer_list *spl_timer_list_t; +#else +typedef unsigned long spl_timer_list_t; +#endif + +#ifndef HAVE_KERNEL_TIMER_SETUP + +static inline void +timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl) +{ +#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS + (timer)->flags = fl; +#endif + init_timer(timer); + setup_timer(timer, func, (spl_timer_list_t)(timer)); +} + +#endif /* HAVE_KERNEL_TIMER_SETUP */ + #endif /* _SPL_TIMER_H */ diff --git a/include/spl/sys/vnode.h b/include/spl/sys/vnode.h index 71278b08c867..7bd278e4e13b 100644 --- a/include/spl/sys/vnode.h +++ b/include/spl/sys/vnode.h @@ -182,7 +182,6 @@ extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag, extern file_t *vn_getf(int fd); extern void vn_releasef(int fd); extern void vn_areleasef(int fd, uf_info_t *fip); -extern int vn_set_pwd(const char *filename); int spl_vn_init(void); void spl_vn_fini(void); diff --git a/include/sys/dnode.h b/include/sys/dnode.h index c60258bbc768..e97e40373b4d 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -46,6 +46,7 @@ extern "C" { */ #define DNODE_MUST_BE_ALLOCATED 1 #define DNODE_MUST_BE_FREE 2 +#define DNODE_DRY_RUN 4 /* * dnode_next_offset() flags. @@ -415,6 +416,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots, boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); +int dnode_try_claim(objset_t *os, uint64_t object, int slots); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, @@ -532,11 +534,6 @@ typedef struct dnode_stats { * a range of dnode slots which would overflow the dnode_phys_t. */ kstat_named_t dnode_hold_free_overflow; - /* - * Number of times a dnode_hold(...) was attempted on a dnode - * which had already been unlinked in an earlier txg. - */ - kstat_named_t dnode_hold_free_txg; /* * Number of times dnode_free_interior_slots() needed to retry * acquiring a slot zrl lock due to contention. diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h index 3cdad7441407..ea7d70cf3232 100644 --- a/include/sys/dsl_bookmark.h +++ b/include/sys/dsl_bookmark.h @@ -37,9 +37,11 @@ typedef struct zfs_bookmark_phys { uint64_t zbm_creation_txg; /* birth transaction group */ uint64_t zbm_creation_time; /* bookmark creation time */ - /* the following fields are reserved for redacted send / recv */ + /* fields used for redacted send / recv */ uint64_t zbm_redaction_obj; /* redaction list object */ uint64_t zbm_flags; /* ZBM_FLAG_* */ + + /* fields used for bookmark written size */ uint64_t zbm_referenced_bytes_refd; uint64_t zbm_compressed_bytes_refd; uint64_t zbm_uncompressed_bytes_refd; diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h index c2c0a548a488..0f73ea6c6df8 100644 --- a/include/sys/dsl_crypt.h +++ b/include/sys/dsl_crypt.h @@ -209,7 +209,6 @@ void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, dmu_tx_t *tx); -int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 3bcefdbfd775..c167a594a7d4 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1318,6 +1318,7 @@ typedef enum { ZFS_ERR_FROM_IVSET_GUID_MISSING, ZFS_ERR_FROM_IVSET_GUID_MISMATCH, ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, + ZFS_ERR_EXPORT_IN_PROGRESS, } zfs_errno_t; /* diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index cd05ccb5bc04..9fd52e110b9f 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -50,6 +50,7 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, void metaslab_fini(metaslab_t *); int metaslab_load(metaslab_t *); +void metaslab_potentially_unload(metaslab_t *, uint64_t); void metaslab_unload(metaslab_t *); uint64_t metaslab_allocated_space(metaslab_t *); diff --git a/include/sys/multilist.h b/include/sys/multilist.h index 439540685971..0c7b4075d9a3 100644 --- a/include/sys/multilist.h +++ b/include/sys/multilist.h @@ -89,6 +89,8 @@ void multilist_sublist_insert_head(multilist_sublist_t *, void *); void multilist_sublist_insert_tail(multilist_sublist_t *, void *); void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj); void multilist_sublist_remove(multilist_sublist_t *, void *); +int multilist_sublist_is_empty(multilist_sublist_t *); +int multilist_sublist_is_empty_idx(multilist_t *, unsigned int); void *multilist_sublist_head(multilist_sublist_t *); void *multilist_sublist_tail(multilist_sublist_t *); diff --git a/include/sys/pathname.h b/include/sys/pathname.h index 5db69b1784c9..d79cc5c01afd 100644 --- a/include/sys/pathname.h +++ b/include/sys/pathname.h @@ -54,8 +54,10 @@ extern "C" { */ typedef struct pathname { char *pn_buf; /* underlying storage */ +#if 0 /* unused in ZoL */ char *pn_path; /* remaining pathname */ size_t pn_pathlen; /* remaining length */ +#endif size_t pn_bufsize; /* total size of pn_buf */ } pathname_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 1b9c46d104b1..e6f11077b349 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1126,7 +1126,7 @@ extern uint64_t spa_missing_tvds_allowed(spa_t *spa); extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); -extern unsigned long spa_get_hostid(void); +extern uint32_t spa_get_hostid(spa_t *spa); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern int spa_mode(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index af37724d016e..7c152dd7f4ad 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -219,6 +219,7 @@ struct spa { spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ + boolean_t spa_is_exporting; /* true while exporting pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_special_class; /* special allocation class */ @@ -395,6 +396,7 @@ struct spa { mmp_thread_t spa_mmp; /* multihost mmp thread */ list_t spa_leaf_list; /* list of leaf vdevs */ uint64_t spa_leaf_list_gen; /* track leaf_list changes */ + uint32_t spa_hostid; /* cached system hostid */ /* * spa_refcount & spa_config_lock must be the last elements diff --git a/include/sys/vdev_removal.h b/include/sys/vdev_removal.h index 3962237afdab..e3bab0658d62 100644 --- a/include/sys/vdev_removal.h +++ b/include/sys/vdev_removal.h @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + * Copyright (c) 2014, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_REMOVAL_H @@ -81,13 +81,13 @@ extern void spa_vdev_condense_suspend(spa_t *); extern int spa_vdev_remove(spa_t *, uint64_t, boolean_t); extern void free_from_removing_vdev(vdev_t *, uint64_t, uint64_t); extern int spa_removal_get_stats(spa_t *, pool_removal_stat_t *); -extern void svr_sync(spa_t *spa, dmu_tx_t *tx); +extern void svr_sync(spa_t *, dmu_tx_t *); extern void spa_vdev_remove_suspend(spa_t *); extern int spa_vdev_remove_cancel(spa_t *); -extern void spa_vdev_removal_destroy(spa_vdev_removal_t *svr); +extern void spa_vdev_removal_destroy(spa_vdev_removal_t *); +extern uint64_t spa_remove_max_segment(spa_t *); extern int vdev_removal_max_span; -extern int zfs_remove_max_segment; #ifdef __cplusplus } diff --git a/include/sys/zap.h b/include/sys/zap.h index ab13652d8c07..b19b4643879c 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -350,6 +350,7 @@ typedef struct zap_cursor { uint64_t zc_serialized; uint64_t zc_hash; uint32_t zc_cd; + boolean_t zc_prefetch; } zap_cursor_t; typedef struct { @@ -375,7 +376,9 @@ typedef struct { * Initialize a zap cursor, pointing to the "first" attribute of the * zapobj. You must _fini the cursor when you are done with it. */ -void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj); +void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); +void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, + uint64_t zapobj); void zap_cursor_fini(zap_cursor_t *zc); /* diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index dcb2edeb1d8f..563e63d0ed7b 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -258,6 +258,8 @@ extern void mutex_enter(kmutex_t *mp); extern void mutex_exit(kmutex_t *mp); extern int mutex_tryenter(kmutex_t *mp); +#define NESTED_SINGLE 1 +#define mutex_enter_nested(mp, class) mutex_enter(mp) /* * RW locks */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d4a3ea769331..01b358cc4da8 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -196,6 +196,7 @@ typedef struct znode { uint8_t z_atime_dirty; /* atime needs to be synced */ uint8_t z_zn_prefetch; /* Prefetch znodes? */ uint8_t z_moved; /* Has this znode been moved? */ + boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ uint64_t z_mapcnt; /* number of pages mapped to file */ @@ -371,7 +372,7 @@ extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, vattr_t *vap); extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid); + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked); #define ZFS_NO_OBJECT 0 /* no object id */ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); diff --git a/include/sys/zio_compress.h b/include/sys/zio_compress.h index 1642823d3d42..208117eee4b5 100644 --- a/include/sys/zio_compress.h +++ b/include/sys/zio_compress.h @@ -105,8 +105,7 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len, int level); -extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len, - int level); + /* * Compress and decompress data if necessary. */ diff --git a/include/zfs_namecheck.h b/include/zfs_namecheck.h index 527db92b0cfa..56d3d36f026e 100644 --- a/include/zfs_namecheck.h +++ b/include/zfs_namecheck.h @@ -43,6 +43,8 @@ typedef enum { NAME_ERR_RESERVED, /* entire name is reserved */ NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */ NAME_ERR_TOOLONG, /* name is too long */ + NAME_ERR_SELF_REF, /* reserved self path name ('.') */ + NAME_ERR_PARENT_REF, /* reserved parent path name ('..') */ NAME_ERR_NO_AT, /* permission set is missing '@' */ } namecheck_err_t; diff --git a/lib/libzfs/libzfs.pc.in b/lib/libzfs/libzfs.pc.in index 0e83f7a64be0..1122401a6eb9 100644 --- a/lib/libzfs/libzfs.pc.in +++ b/lib/libzfs/libzfs.pc.in @@ -9,4 +9,4 @@ Version: @VERSION@ URL: http://zfsonlinux.org Requires: libzfs_core Cflags: -I${includedir}/libzfs -I${includedir}/libspl -Libs: -L${libdir} -lzfs +Libs: -L${libdir} -lzfs -lnvpair diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 3101febc1605..72f641056edc 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -475,9 +475,10 @@ change_one(zfs_handle_t *zhp, void *data) prop_changelist_t *clp = data; char property[ZFS_MAXPROPLEN]; char where[64]; - prop_changenode_t *cn; + prop_changenode_t *cn = NULL; zprop_source_t sourcetype = ZPROP_SRC_NONE; zprop_source_t share_sourcetype = ZPROP_SRC_NONE; + int ret = 0; /* * We only want to unmount/unshare those filesystems that may inherit @@ -493,8 +494,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_prop, property, sizeof (property), &sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } /* @@ -506,8 +506,7 @@ change_one(zfs_handle_t *zhp, void *data) zfs_prop_get(zhp, clp->cl_shareprop, property, sizeof (property), &share_sourcetype, where, sizeof (where), B_FALSE) != 0) { - zfs_close(zhp); - return (0); + goto out; } if (clp->cl_alldependents || clp->cl_allchildren || @@ -518,8 +517,8 @@ change_one(zfs_handle_t *zhp, void *data) share_sourcetype == ZPROP_SRC_INHERITED))) { if ((cn = zfs_alloc(zfs_get_handle(zhp), sizeof (prop_changenode_t))) == NULL) { - zfs_close(zhp); - return (-1); + ret = -1; + goto out; } cn->cn_handle = zhp; @@ -541,16 +540,23 @@ change_one(zfs_handle_t *zhp, void *data) uu_avl_insert(clp->cl_tree, cn, idx); } else { free(cn); - zfs_close(zhp); + cn = NULL; } if (!clp->cl_alldependents) - return (zfs_iter_children(zhp, change_one, data)); - } else { - zfs_close(zhp); + ret = zfs_iter_children(zhp, change_one, data); + + /* + * If we added the handle to the changelist, we will re-use it + * later so return without closing it. + */ + if (cn != NULL) + return (ret); } - return (0); +out: + zfs_close(zhp); + return (ret); } static int diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c index 3318a6bd2e11..d31f43b1fdf2 100644 --- a/lib/libzfs/libzfs_crypto.c +++ b/lib/libzfs/libzfs_crypto.c @@ -740,14 +740,6 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, pcrypt = ZIO_CRYPT_OFF; } - /* Check for encryption being explicitly truned off */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Invalid encryption value. Dataset must be encrypted.")); - goto out; - } - /* Get the inherited encryption property if we don't have it locally */ if (!local_crypt) crypt = pcrypt; @@ -849,10 +841,7 @@ int zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, char *parent_name, nvlist_t *props) { - int ret; char errbuf[1024]; - zfs_handle_t *pzhp = NULL; - uint64_t pcrypt, ocrypt; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "Encryption clone error")); @@ -865,40 +854,12 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) || nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) { - ret = EINVAL; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Encryption properties must inherit from origin dataset.")); - goto out; - } - - /* get a reference to parent dataset, should never be NULL */ - pzhp = make_dataset_handle(hdl, parent_name); - if (pzhp == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Failed to lookup parent.")); - return (ENOENT); + return (EINVAL); } - /* Lookup parent's crypt */ - pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); - ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); - - /* all children of encrypted parents must be encrypted */ - if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { - ret = EINVAL; - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Cannot create unencrypted clone as a child " - "of encrypted parent.")); - goto out; - } - - zfs_close(pzhp); return (0); - -out: - if (pzhp != NULL) - zfs_close(pzhp); - return (ret); } typedef struct loadkeys_cbdata { diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index f1f1d4d72605..a5d7f97d2e66 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -197,6 +197,16 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, "reserved disk name")); break; + case NAME_ERR_SELF_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "self reference, '.' is found in name")); + break; + + case NAME_ERR_PARENT_REF: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent reference, '..' is found in name")); + break; + default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "(%d) not defined"), why); @@ -4116,6 +4126,16 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { switch (ret) { + case EACCES: + /* + * Promoting encrypted dataset outside its + * encryption root. + */ + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot promote dataset outside its " + "encryption root")); + return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + case EEXIST: /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -4479,8 +4499,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, zfs_cmd_t zc = {"\0"}; char *delim; prop_changelist_t *cl = NULL; - zfs_handle_t *zhrp = NULL; - char *parentname = NULL; char parent[ZFS_MAX_DATASET_NAME_LEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4575,7 +4593,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } if (recursive) { - parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); + zfs_handle_t *zhrp; + char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { ret = -1; goto error; @@ -4583,10 +4602,12 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, delim = strchr(parentname, '@'); *delim = '\0'; zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); + free(parentname); if (zhrp == NULL) { ret = -1; goto error; } + zfs_close(zhrp); } else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, CL_GATHER_ITER_MOUNTED, @@ -4630,16 +4651,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); } else if (errno == EACCES) { - if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == - ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot rename an unencrypted dataset to " - "be a decendent of an encrypted one")); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "cannot move encryption child outside of " - "its encryption root")); - } + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encrypted child outside of " + "its encryption root")); (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); @@ -4659,12 +4673,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, } error: - if (parentname != NULL) { - free(parentname); - } - if (zhrp != NULL) { - zfs_close(zhrp); - } if (cl != NULL) { changelist_free(cl); } diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 9d2a80939a58..9d6a0183e8a2 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -1302,12 +1302,14 @@ mountpoint_cmp(const void *arga, const void *argb) } /* - * Return true if path2 is a child of path1. + * Return true if path2 is a child of path1 or path2 equals path1 or + * path1 is "/" (path2 is always a child of "/"). */ static boolean_t libzfs_path_contains(const char *path1, const char *path2) { - return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'); + return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 || + (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/')); } /* diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index f69a46430bbe..d967e043b4e5 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2827,7 +2827,7 @@ recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname, is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); - /* we don't need to do anything for unencrypted filesystems */ + /* we don't need to do anything for unencrypted datasets */ if (crypt == ZIO_CRYPT_OFF) { zfs_close(zhp); continue; @@ -3992,11 +3992,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } } else { /* - * if the fs does not exist, look for it based on the - * fromsnap GUID + * If the fs does not exist, look for it based on the + * fromsnap GUID. */ - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot receive incremental stream")); + if (resuming) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive resume stream")); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot receive incremental stream")); + } (void) strcpy(name, destsnap); *strchr(name, '@') = '\0'; @@ -4210,34 +4217,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, goto out; } - /* - * It is invalid to receive a properties stream that was - * unencrypted on the send side as a child of an encrypted - * parent. Technically there is nothing preventing this, but - * it would mean that the encryption=off property which is - * locally set on the send side would not be received correctly. - * We can infer encryption=off if the stream is not raw and - * properties were included since the send side will only ever - * send the encryption property in a raw nvlist header. This - * check will be avoided if the user specifically overrides - * the encryption property on the command line. - */ - if (!raw && rcvprops != NULL && - !nvlist_exists(cmdprops, - zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { - uint64_t crypt; - - crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); - - if (crypt != ZIO_CRYPT_OFF) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "parent '%s' must not be encrypted to " - "receive unenecrypted property"), name); - err = zfs_error(hdl, EZFS_BADPROP, errbuf); - zfs_close(zhp); - goto out; - } - } zfs_close(zhp); newfs = B_TRUE; @@ -4274,6 +4253,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, &oxprops, &wkeydata, &wkeylen, errbuf)) != 0) goto out; + /* + * When sending with properties (zfs send -p), the encryption property + * is not included because it is a SETONCE property and therefore + * treated as read only. However, we are always able to determine its + * value because raw sends will include it in the DRR_BDEGIN payload + * and non-raw sends with properties are not allowed for encrypted + * datasets. Therefore, if this is a non-raw properties stream, we can + * infer that the value should be ZIO_CRYPT_OFF and manually add that + * to the received properties. + */ + if (stream_wantsnewfs && !raw && rcvprops != NULL && + !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) { + if (oxprops == NULL) + oxprops = fnvlist_alloc(); + fnvlist_add_uint64(oxprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF); + } + err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable, raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags, @@ -4428,14 +4425,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: - if (flags->resumable) + if (flags->resumable) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "kernel modules must be upgraded to " "receive this stream.")); - if (embedded && !raw) + } else if (embedded && !raw) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "incompatible embedded data stream " "feature with encrypted receive.")); + } (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 19bb57ad4378..eed6282ca357 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -303,6 +303,8 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_NO_RESILVER_DEFER: return (dgettext(TEXT_DOMAIN, "this action requires the " "resilver_defer feature")); + case EZFS_EXPORT_IN_PROGRESS: + return (dgettext(TEXT_DOMAIN, "pool export in progress")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -598,6 +600,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_VDEV_TOO_BIG: zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap); break; + case ZFS_ERR_EXPORT_IN_PROGRESS: + zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap); + break; case ZFS_ERR_IOC_CMD_UNAVAIL: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs " "module does not support this operation. A reboot may " @@ -1134,7 +1139,7 @@ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) - len = 16 * 1024; + len = 256 * 1024; zc->zc_nvlist_dst_size = len; zc->zc_nvlist_dst = (uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 99fc84d04614..eb332bc94e8c 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -52,7 +52,7 @@ * * - Thin Layer. libzfs_core is a thin layer, marshaling arguments * to/from the kernel ioctls. There is generally a 1:1 correspondence - * between libzfs_core functions and ioctls to /dev/zfs. + * between libzfs_core functions and ioctls to ZFS_DEV. * * - Clear Atomicity. Because libzfs_core functions are generally 1:1 * with kernel ioctls, and kernel ioctls are general atomic, each @@ -135,7 +135,7 @@ libzfs_core_init(void) { (void) pthread_mutex_lock(&g_lock); if (g_refcount == 0) { - g_fd = open("/dev/zfs", O_RDWR); + g_fd = open(ZFS_DEV, O_RDWR); if (g_fd < 0) { (void) pthread_mutex_unlock(&g_lock); return (errno); @@ -499,7 +499,7 @@ lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) * The snapshots must all be in the same pool. * The value is the name of the hold (string type). * - * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). + * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). * In this case, when the cleanup_fd is closed (including on process * termination), the holds will be released. If the system is shut down * uncleanly, the holds will be released when the pool is next opened diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c index ad05d2239ae0..67bc209ceec9 100644 --- a/lib/libzpool/util.c +++ b/lib/libzpool/util.c @@ -223,7 +223,7 @@ pool_active(void *unused, const char *name, uint64_t guid, * Use ZFS_IOC_POOL_SYNC to confirm if a pool is active */ - fd = open("/dev/zfs", O_RDWR); + fd = open(ZFS_DEV, O_RDWR); if (fd < 0) return (-1); diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am index bd78be1452a8..2af917fa5c2e 100644 --- a/man/man1/Makefile.am +++ b/man/man1/Makefile.am @@ -1,4 +1,4 @@ -dist_man_MANS = zhack.1 ztest.1 raidz_test.1 +dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 EXTRA_DIST = cstyle.1 install-data-local: diff --git a/man/man1/zvol_wait.1 b/man/man1/zvol_wait.1 new file mode 100644 index 000000000000..0366da5376d3 --- /dev/null +++ b/man/man1/zvol_wait.1 @@ -0,0 +1,21 @@ +.Dd July 5, 2019 +.Dt ZVOL_WAIT 1 SMM +.Os Linux +.Sh NAME +.Nm zvol_wait +.Nd Wait for ZFS volume links in +.Em /dev +to be created. +.Sh SYNOPSIS +.Nm +.Sh DESCRIPTION +When a ZFS pool is imported, ZFS will register each ZFS volume +(zvol) as a disk device with the system. As the disks are registered, +.Xr \fBudev 7\fR +will asynchronously create symlinks under +.Em /dev/zvol +using the zvol's name. +.Nm +will wait for all those symlinks to be created before returning. +.Sh SEE ALSO +.Xr \fBudev 7\fR diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 5bca12e06ea2..8ad3ce466ce5 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -104,6 +104,18 @@ to a log2 fraction of the target arc size. Default value: \fB6\fR. .RE +.sp +.ne 2 +.na +\fBdmu_prefetch_max\fR (int) +.ad +.RS 12n +Limit the amount we can prefetch with one call to this amount (in bytes). +This helps to limit the amount of memory that can be used by prefetching. +.sp +Default value: \fB134,217,728\fR (128MB). +.RE + .sp .ne 2 .na @@ -502,6 +514,19 @@ regular reads (but there's no reason it has to be the same). Default value: \fB32,768\fR. .RE +.sp +.ne 2 +.na +\fBzap_iterate_prefetch\fR (int) +.ad +.RS 12n +If this is set, when we start iterating over a ZAP object, zfs will prefetch +the entire object (all leaf blocks). However, this is limited by +\fBdmu_prefetch_max\fR. +.sp +Use \fB1\fR for on (default) and \fB0\fR for off. +.RE + .sp .ne 2 .na @@ -1817,7 +1842,7 @@ this value. If a metaslab group exceeds this threshold then it will be skipped unless all metaslab groups within the metaslab class have also crossed this threshold. .sp -Default value: \fB85\fR. +Default value: \fB95\fR. .RE .sp @@ -2169,6 +2194,33 @@ pool cannot be returned to a healthy state prior to removing the device. Default value: \fB0\fR. .RE +.sp +.ne 2 +.na +\fBzfs_removal_suspend_progress\fR (int) +.ad +.RS 12n +.sp +This is used by the test suite so that it can ensure that certain actions +happen while in the middle of a removal. +.sp +Default value: \fB0\fR. +.RE + +.sp +.ne 2 +.na +\fBzfs_remove_max_segment\fR (int) +.ad +.RS 12n +.sp +The largest contiguous segment that we will attempt to allocate when removing +a device. This can be no larger than 16MB. If there is a performance +problem with attempting to allocate large blocks, consider decreasing this. +.sp +Default value: \fB16,777,216\fR (16MB). +.RE + .sp .ne 2 .na @@ -2419,9 +2471,21 @@ Default value: \fB25\fR. \fBzfs_sync_pass_dont_compress\fR (int) .ad .RS 12n -Don't compress starting in this pass +Starting in this sync pass, we disable compression (including of metadata). +With the default setting, in practice, we don't have this many sync passes, +so this has no effect. .sp -Default value: \fB5\fR. +The original intent was that disabling compression would help the sync passes +to converge. However, in practice disabling compression increases the average +number of sync passes, because when we turn compression off, a lot of block's +size will change and thus we have to re-allocate (not overwrite) them. It +also increases the number of 128KB allocations (e.g. for indirect blocks and +spacemaps) because these will not be compressed. The 128K allocations are +especially detrimental to performance on highly fragmented systems, which may +have very few free segments of this size, and may need to load new metaslabs +to satisfy 128K allocations. +.sp +Default value: \fB8\fR. .RE .sp diff --git a/man/man8/zfs-mount-generator.8.in b/man/man8/zfs-mount-generator.8.in index 79720601d62a..48e4e2dfac29 100644 --- a/man/man8/zfs-mount-generator.8.in +++ b/man/man8/zfs-mount-generator.8.in @@ -26,7 +26,7 @@ information on ZFS mountpoints must be stored separately. The output of the command .PP .RS 4 -zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand +zfs list -H -o name,mountpoint,canmount,atime,relatime,devices,exec,readonly,setuid,nbmand,encroot,keylocation .RE .PP for datasets that should be mounted by systemd, should be kept diff --git a/module/Makefile.in b/module/Makefile.in index 935bd2663062..7477dbe56509 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -1,11 +1,11 @@ -subdir-m += avl -subdir-m += icp -subdir-m += lua -subdir-m += nvpair -subdir-m += spl -subdir-m += unicode -subdir-m += zcommon -subdir-m += zfs +obj-m += avl/ +obj-m += icp/ +obj-m += lua/ +obj-m += nvpair/ +obj-m += spl/ +obj-m += unicode/ +obj-m += zcommon/ +obj-m += zfs/ INSTALL_MOD_DIR ?= extra @@ -60,14 +60,15 @@ modules_install: modules_uninstall: @# Uninstall the kernel modules kmoddir=$(DESTDIR)$(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@ - list='$(subdir-m)'; for subdir in $$list; do \ - $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$subdir; \ + list='$(obj-m)'; for objdir in $$list; do \ + $(RM) -R $$kmoddir/$(INSTALL_MOD_DIR)/$$objdir; \ done distdir: - list='$(subdir-m)'; for subdir in $$list; do \ - (cd @top_srcdir@/module && find $$subdir -name '*.c' -o -name '*.h' -o -name '*.S' |\ - xargs cp --parents -t $$distdir); \ + list='$(obj-m)'; for objdir in $$list; do \ + (cd @top_srcdir@/module && find $$objdir \ + -name '*.c' -o -name '*.h' -o -name '*.S' | \ + xargs cp --parents -t @abs_top_builddir@/module/$$distdir); \ done distclean maintainer-clean: clean diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index e15050635741..36e0686a51c2 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -303,16 +303,21 @@ aes_impl_init(void) } aes_supp_impl_cnt = c; - /* set fastest implementation. assume hardware accelerated is fastest */ + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ #if defined(__x86_64) #if defined(HAVE_AES) - if (aes_aesni_impl.is_supported()) + if (aes_aesni_impl.is_supported()) { memcpy(&aes_fastest_impl, &aes_aesni_impl, sizeof (aes_fastest_impl)); - else + } else #endif + { memcpy(&aes_fastest_impl, &aes_x86_64_impl, sizeof (aes_fastest_impl)); + } #else memcpy(&aes_fastest_impl, &aes_generic_impl, sizeof (aes_fastest_impl)); diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 13bceef0f170..0afd957f0cf9 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -646,7 +646,7 @@ const gcm_impl_ops_t *gcm_all_impl[] = { /* Indicate that benchmark has been completed */ static boolean_t gcm_impl_initialized = B_FALSE; -/* Select aes implementation */ +/* Select GCM implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) @@ -713,13 +713,15 @@ gcm_impl_init(void) /* set fastest implementation. assume hardware accelerated is fastest */ #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) - if (gcm_pclmulqdq_impl.is_supported()) + if (gcm_pclmulqdq_impl.is_supported()) { memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, sizeof (gcm_fastest_impl)); - else + } else #endif + { memcpy(&gcm_fastest_impl, &gcm_generic_impl, sizeof (gcm_fastest_impl)); + } strcpy(gcm_fastest_impl.name, "fastest"); @@ -742,7 +744,7 @@ static const struct { * If we are called before init(), user preference will be saved in * user_sel_impl, and applied in later init() call. This occurs when module * parameter is specified on module load. Otherwise, directly update - * icp_aes_impl. + * icp_gcm_impl. * * @val Name of gcm implementation to use * @param Unused. diff --git a/module/icp/include/aes/aes_impl.h b/module/icp/include/aes/aes_impl.h index 95cfddf9e0a4..3a3de91cf6a5 100644 --- a/module/icp/include/aes/aes_impl.h +++ b/module/icp/include/aes/aes_impl.h @@ -162,7 +162,7 @@ typedef enum aes_mech_type { #endif /* _AES_IMPL */ /* - * Methods used to define aes implementation + * Methods used to define AES implementation * * @aes_gen_f Key generation * @aes_enc_f Function encrypts one block diff --git a/module/icp/include/modes/gcm_impl.h b/module/icp/include/modes/gcm_impl.h index cbb904c059b7..b78cc8aab010 100644 --- a/module/icp/include/modes/gcm_impl.h +++ b/module/icp/include/modes/gcm_impl.h @@ -37,12 +37,12 @@ extern "C" { #include /* - * Methods used to define gcm implementation + * Methods used to define GCM implementation * * @gcm_mul_f Perform carry-less multiplication * @gcm_will_work_f Function tests whether implementation will function */ -typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); +typedef void (*gcm_mul_f)(uint64_t *, uint64_t *, uint64_t *); typedef boolean_t (*gcm_will_work_f)(void); #define GCM_IMPL_NAME_MAX (16) diff --git a/module/lua/ldo.c b/module/lua/ldo.c index aca02b234770..59d0b6a2c298 100644 --- a/module/lua/ldo.c +++ b/module/lua/ldo.c @@ -61,7 +61,7 @@ #elif defined(__mips__) #define JMP_BUF_CNT 12 #elif defined(__s390x__) -#define JMP_BUF_CNT 9 +#define JMP_BUF_CNT 18 #else #define JMP_BUF_CNT 1 #endif diff --git a/module/lua/llex.c b/module/lua/llex.c index 8760155d0546..50c301f599f1 100644 --- a/module/lua/llex.c +++ b/module/lua/llex.c @@ -431,9 +431,12 @@ static int llex (LexState *ls, SemInfo *seminfo) { if (sep >= 0) { read_long_string(ls, seminfo, sep); return TK_STRING; - } - else if (sep == -1) return '['; - else lexerror(ls, "invalid long string delimiter", TK_STRING); + } else if (sep == -1) { + return '['; + } else { + lexerror(ls, "invalid long string delimiter", TK_STRING); + break; + } } case '=': { next(ls); diff --git a/module/spl/Makefile.in b/module/spl/Makefile.in index 3bcbf63cbc63..e16666aa94f3 100644 --- a/module/spl/Makefile.in +++ b/module/spl/Makefile.in @@ -16,10 +16,8 @@ $(MODULE)-objs += spl-kmem.o $(MODULE)-objs += spl-kmem-cache.o $(MODULE)-objs += spl-kobj.o $(MODULE)-objs += spl-kstat.o -$(MODULE)-objs += spl-mutex.o $(MODULE)-objs += spl-proc.o $(MODULE)-objs += spl-procfs-list.o -$(MODULE)-objs += spl-rwlock.o $(MODULE)-objs += spl-taskq.o $(MODULE)-objs += spl-thread.o $(MODULE)-objs += spl-tsd.o diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 1e6e38b7874b..a7a9d1db9a98 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -154,26 +154,39 @@ EXPORT_SYMBOL(__cv_wait_sig); #if defined(HAVE_IO_SCHEDULE_TIMEOUT) #define spl_io_schedule_timeout(t) io_schedule_timeout(t) #else + +struct spl_task_timer { + struct timer_list timer; + struct task_struct *task; +}; + static void -__cv_wakeup(unsigned long data) +__cv_wakeup(spl_timer_list_t t) { - wake_up_process((struct task_struct *)data); + struct timer_list *tmr = (struct timer_list *)t; + struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); + + wake_up_process(task_timer->task); } static long spl_io_schedule_timeout(long time_left) { long expire_time = jiffies + time_left; - struct timer_list timer; + struct spl_task_timer task_timer; + struct timer_list *timer = &task_timer.timer; + + task_timer.task = current; - init_timer(&timer); - setup_timer(&timer, __cv_wakeup, (unsigned long)current); - timer.expires = expire_time; - add_timer(&timer); + timer_setup(timer, __cv_wakeup, 0); + + timer->expires = expire_time; + add_timer(timer); io_schedule(); - del_timer_sync(&timer); + del_timer_sync(timer); + time_left = expire_time - jiffies; return (time_left < 0 ? 0 : time_left); diff --git a/module/spl/spl-generic.c b/module/spl/spl-generic.c index cd2fa2020510..3c5ef60bd1a4 100644 --- a/module/spl/spl-generic.c +++ b/module/spl/spl-generic.c @@ -694,51 +694,41 @@ spl_init(void) if ((rc = spl_kvmem_init())) goto out1; - if ((rc = spl_mutex_init())) - goto out2; - - if ((rc = spl_rw_init())) - goto out3; - if ((rc = spl_tsd_init())) - goto out4; + goto out2; if ((rc = spl_taskq_init())) - goto out5; + goto out3; if ((rc = spl_kmem_cache_init())) - goto out6; + goto out4; if ((rc = spl_vn_init())) - goto out7; + goto out5; if ((rc = spl_proc_init())) - goto out8; + goto out6; if ((rc = spl_kstat_init())) - goto out9; + goto out7; if ((rc = spl_zlib_init())) - goto out10; + goto out8; return (rc); -out10: - spl_kstat_fini(); -out9: - spl_proc_fini(); out8: - spl_vn_fini(); + spl_kstat_fini(); out7: - spl_kmem_cache_fini(); + spl_proc_fini(); out6: - spl_taskq_fini(); + spl_vn_fini(); out5: - spl_tsd_fini(); + spl_kmem_cache_fini(); out4: - spl_rw_fini(); + spl_taskq_fini(); out3: - spl_mutex_fini(); + spl_tsd_fini(); out2: spl_kvmem_fini(); out1: @@ -755,8 +745,6 @@ spl_fini(void) spl_kmem_cache_fini(); spl_taskq_fini(); spl_tsd_fini(); - spl_rw_fini(); - spl_mutex_fini(); spl_kvmem_fini(); } diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 1fdb61e6fce1..824b5e89f507 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -180,7 +180,8 @@ spl_kmem_alloc_impl(size_t size, int flags, int node) */ if ((size > spl_kmem_alloc_max) || use_vmem) { if (flags & KM_VMEM) { - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, + PAGE_KERNEL); } else { return (NULL); } diff --git a/module/spl/spl-mutex.c b/module/spl/spl-mutex.c deleted file mode 100644 index ba818862b679..000000000000 --- a/module/spl/spl-mutex.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Mutex Implementation. - */ - -#include - -int spl_mutex_init(void) { return 0; } -void spl_mutex_fini(void) { } diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c deleted file mode 100644 index 86727ed1957c..000000000000 --- a/module/spl/spl-rwlock.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. - * Copyright (C) 2007 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Brian Behlendorf . - * UCRL-CODE-235197 - * - * This file is part of the SPL, Solaris Porting Layer. - * For details, see . - * - * The SPL is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * The SPL is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with the SPL. If not, see . - * - * Solaris Porting Layer (SPL) Reader/Writer Lock Implementation. - */ - -#include -#include - -#if defined(CONFIG_PREEMPT_RT_FULL) - -#include -#define RT_MUTEX_OWNER_MASKALL 1UL - -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ -#if defined(READER_BIAS) && defined(WRITER_BIAS) - /* - * After the 4.9.20-rt16 kernel the realtime patch series lifted the - * single reader restriction. While this could be accommodated by - * adding additional compatibility code assume the rwsem can never - * be upgraded. All caller must already cleanly handle this case. - */ - return (0); -#else - ASSERT((struct task_struct *) - ((unsigned long)rwsem->lock.owner & ~RT_MUTEX_OWNER_MASKALL) == - current); - - /* - * Prior to 4.9.20-rt16 kernel the realtime patch series, rwsem is - * implemented as a single mutex held by readers and writers alike. - * However, this implementation would prevent a thread from taking - * a read lock twice, as the mutex would already be locked on - * the second attempt. Therefore the implementation allows a - * single thread to take a rwsem as read lock multiple times - * tracking that nesting as read_depth counter. - */ - if (rwsem->read_depth <= 1) { - /* - * In case, the current thread has not taken the lock - * more than once as read lock, we can allow an - * upgrade to a write lock. rwsem_rt.h implements - * write locks as read_depth == 0. - */ - rwsem->read_depth = 0; - return (1); - } - return (0); -#endif -} -#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - int ret = 0; - unsigned long flags; - spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags); - if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE && - list_empty(&rwsem->wait_list)) { - ret = 1; - RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE; - } - spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags); - return (ret); -} -#elif defined(RWSEM_ACTIVE_MASK) -#if defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - long val; - val = atomic_long_cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - typeof(rwsem->count) val; - val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE, - SPL_RWSEM_SINGLE_WRITER_VALUE); - return (val == SPL_RWSEM_SINGLE_READER_VALUE); -} -#endif -#else -static int -__rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - return (0); -} -#endif - -int -rwsem_tryupgrade(struct rw_semaphore *rwsem) -{ - if (__rwsem_tryupgrade(rwsem)) { - rwsem_release(&rwsem->dep_map, 1, _RET_IP_); - rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - rwsem->owner = current; -#endif - return (1); - } - return (0); -} -EXPORT_SYMBOL(rwsem_tryupgrade); - -int spl_rw_init(void) { return 0; } -void spl_rw_fini(void) { } diff --git a/module/spl/spl-taskq.c b/module/spl/spl-taskq.c index 7684257be7ad..a39f94e4cc20 100644 --- a/module/spl/spl-taskq.c +++ b/module/spl/spl-taskq.c @@ -24,6 +24,7 @@ * Solaris Porting Layer (SPL) Task Queue Implementation. */ +#include #include #include #include @@ -242,20 +243,13 @@ task_expire_impl(taskq_ent_t *t) wake_up(&tq->tq_work_waitq); } -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST static void -task_expire(struct timer_list *tl) +task_expire(spl_timer_list_t tl) { - taskq_ent_t *t = from_timer(t, tl, tqent_timer); + struct timer_list *tmr = (struct timer_list *)tl; + taskq_ent_t *t = from_timer(t, tmr, tqent_timer); task_expire_impl(t); } -#else -static void -task_expire(unsigned long data) -{ - task_expire_impl((taskq_ent_t *)data); -} -#endif /* * Returns the lowest incomplete taskqid_t. The taskqid_t may @@ -597,9 +591,6 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags) t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; -#ifndef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST - t->tqent_timer.data = 0; -#endif t->tqent_timer.function = NULL; t->tqent_timer.expires = 0; t->tqent_birth = jiffies; @@ -649,9 +640,6 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, t->tqent_func = func; t->tqent_arg = arg; t->tqent_taskq = tq; -#ifndef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST - t->tqent_timer.data = (unsigned long)t; -#endif t->tqent_timer.function = task_expire; t->tqent_timer.expires = (unsigned long)expire_time; add_timer(&t->tqent_timer); @@ -744,11 +732,7 @@ taskq_init_ent(taskq_ent_t *t) { spin_lock_init(&t->tqent_lock); init_waitqueue_head(&t->tqent_waitq); -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST timer_setup(&t->tqent_timer, NULL, 0); -#else - init_timer(&t->tqent_timer); -#endif INIT_LIST_HEAD(&t->tqent_list); t->tqent_id = 0; t->tqent_func = NULL; diff --git a/module/spl/spl-thread.c b/module/spl/spl-thread.c index 34d26e8068ca..326c801e2908 100644 --- a/module/spl/spl-thread.c +++ b/module/spl/spl-thread.c @@ -153,8 +153,9 @@ spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...) if (PTR_ERR(tsk) == -ENOMEM) continue; return (NULL); - } else + } else { return (tsk); + } } while (1); } EXPORT_SYMBOL(spl_kthread_create); diff --git a/module/spl/spl-vnode.c b/module/spl/spl-vnode.c index 11b5e4e5a2f2..d9056c964e5a 100644 --- a/module/spl/spl-vnode.c +++ b/module/spl/spl-vnode.c @@ -641,64 +641,6 @@ vn_areleasef(int fd, uf_info_t *fip) } /* releasef() */ EXPORT_SYMBOL(areleasef); - -static void -vn_set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - -#ifdef HAVE_FS_STRUCT_SPINLOCK - spin_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - spin_unlock(&fs->lock); -#else - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); -#endif /* HAVE_FS_STRUCT_SPINLOCK */ - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -int -vn_set_pwd(const char *filename) -{ - struct path path; - mm_segment_t saved_fs; - int rc; - - /* - * user_path_dir() and __user_walk() both expect 'filename' to be - * a user space address so we must briefly increase the data segment - * size to ensure strncpy_from_user() does not fail with -EFAULT. - */ - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = user_path_dir(filename, &path); - if (rc) - goto out; - - rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); - if (rc) - goto dput_and_out; - - vn_set_fs_pwd(current->fs, &path); - -dput_and_out: - path_put(&path); -out: - set_fs(saved_fs); - - return (-rc); -} /* vn_set_pwd() */ -EXPORT_SYMBOL(vn_set_pwd); - static int vn_cache_constructor(void *buf, void *cdrarg, int kmflags) { diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index 5a991ba6073a..f712ce40c6ea 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -592,8 +592,9 @@ fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) } #if defined(_KERNEL) -/* Fletcher 4 kstats */ - +/* + * Fletcher 4 kstats + */ static int fletcher_4_kstat_headers(char *buf, size_t size) { @@ -669,7 +670,6 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) zio_cksum_t zc; uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); - fletcher_checksum_func_t *fletcher_4_test = native ? fletcher_4_native : fletcher_4_byteswap; diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 58b23b0e00b0..b1e0de6d8181 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -232,6 +232,27 @@ entity_namecheck(const char *path, namecheck_err_t *why, char *what) } } + if (*end == '\0' || *end == '/') { + int component_length = end - start; + /* Validate the contents of this component is not '.' */ + if (component_length == 1) { + if (start[0] == '.') { + if (why) + *why = NAME_ERR_SELF_REF; + return (-1); + } + } + + /* Validate the content of this component is not '..' */ + if (component_length == 2) { + if (start[0] == '.' && start[1] == '.') { + if (why) + *why = NAME_ERR_PARENT_REF; + return (-1); + } + } + } + /* Snapshot or bookmark delimiter found */ if (*end == '@' || *end == '#') { /* Multiple delimiters are not allowed */ diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 9041bd8b1841..32b2c842c0df 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -1370,8 +1370,10 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, switch (parity) { case 3: len = MIN(caiters[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(caiters[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(caiters[0].iter_mapsize, len); } @@ -1461,9 +1463,11 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, case 3: len = MIN(xiters[2].iter_mapsize, len); len = MIN(citers[2].iter_mapsize, len); + /* falls through */ case 2: len = MIN(xiters[1].iter_mapsize, len); len = MIN(citers[1].iter_mapsize, len); + /* falls through */ case 1: len = MIN(xiters[0].iter_mapsize, len); len = MIN(citers[0].iter_mapsize, len); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 3dfa6ca202d1..53a44bdaf44c 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Joyent, Inc. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ @@ -1872,7 +1872,8 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * There were no decompressed bufs, so there should not be a * checksum on the hdr either. */ - EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); + if (zfs_flags & ZFS_DEBUG_MODIFY) + EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); return (copied); } @@ -2253,7 +2254,6 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, */ if (arc_buf_try_copy_decompressed_data(buf)) { /* Skip byteswapping and checksumming (already done) */ - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { error = zio_decompress_data(HDR_GET_COMPRESS(hdr), @@ -4801,8 +4801,6 @@ arc_reduce_target_size(int64_t to_free) if (c > to_free && c - to_free > arc_c_min) { arc_c = c - to_free; atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); - if (asize < arc_c) - arc_c = MAX(asize, arc_c_min); if (arc_p > arc_c) arc_p = (arc_c >> 1); ASSERT(arc_c >= arc_c_min); @@ -5081,6 +5079,9 @@ arc_kmem_reap_soon(void) static boolean_t arc_adjust_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + /* * This is necessary so that any changes which may have been made to * many of the zfs_arc_* module parameters will be propagated to @@ -5168,6 +5169,9 @@ arc_adjust_cb(void *arg, zthr_t *zthr) static boolean_t arc_reap_cb_check(void *arg, zthr_t *zthr) { + if (!arc_initialized) + return (B_FALSE); + int64_t free_memory = arc_available_memory(); /* @@ -5608,7 +5612,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * If we are growing the cache, and we are adding anonymous * data, and we have outgrown arc_p, update arc_p */ - if (aggsum_compare(&arc_size, arc_c) < 0 && + if (aggsum_upper_bound(&arc_size) < arc_c && hdr->b_l1hdr.b_state == arc_anon && (zfs_refcount_count(&arc_anon->arcs_size) + zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) @@ -7926,11 +7930,9 @@ arc_fini(void) list_destroy(&arc_prune_list); mutex_destroy(&arc_prune_mtx); - (void) zthr_cancel(arc_adjust_zthr); - zthr_destroy(arc_adjust_zthr); + (void) zthr_cancel(arc_adjust_zthr); (void) zthr_cancel(arc_reap_zthr); - zthr_destroy(arc_reap_zthr); mutex_destroy(&arc_adjust_lock); cv_destroy(&arc_adjust_waiters_cv); @@ -7943,6 +7945,14 @@ arc_fini(void) buf_fini(); arc_state_fini(); + /* + * We destroy the zthrs after all the ARC state has been + * torn down to avoid the case of them receiving any + * wakeup() signals after they are destroyed. + */ + zthr_destroy(arc_adjust_zthr); + zthr_destroy(arc_reap_zthr); + ASSERT0(arc_loaned_bytes); } @@ -8760,7 +8770,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, /* * If this data simply needs its own buffer, we simply allocate it - * and copy the data. This may be done to elimiate a depedency on a + * and copy the data. This may be done to eliminate a dependency on a * shared buffer or to reallocate the buffer to match asize. */ if (HDR_HAS_RABD(hdr) && asize != psize) { diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 19d0d07d6587..22c5867b6acc 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2593,7 +2593,8 @@ dbuf_destroy(dmu_buf_impl_t *db) if (db->db_blkid != DMU_BONUS_BLKID) { boolean_t needlock = !MUTEX_HELD(&dn->dn_dbufs_mtx); if (needlock) - mutex_enter(&dn->dn_dbufs_mtx); + mutex_enter_nested(&dn->dn_dbufs_mtx, + NESTED_SINGLE); avl_remove(&dn->dn_dbufs, db); atomic_dec_32(&dn->dn_dbufs_count); membar_producer(); diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 77c0784cca0b..3489d31d9c9e 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018 by Delphix. All rights reserved. */ #include @@ -117,7 +118,18 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) zap_attribute_t za; int error; - zap_cursor_init_serialized(&zc, os, object, *walk); + if (*walk == 0) { + /* + * We don't want to prefetch the entire ZAP object, because + * it can be enormous. Also the primary use of DDT iteration + * is for scrubbing, in which case we will be issuing many + * scrub I/Os for each ZAP block that we read in, so + * reading the ZAP is unlikely to be the bottleneck. + */ + zap_cursor_init_noprefetch(&zc, os, object); + } else { + zap_cursor_init_serialized(&zc, os, object, *walk); + } if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { uchar_t cbuf[sizeof (dde->dde_phys) + 1]; uint64_t csize = za.za_num_integers; diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 22a38b03d116..c83029111506 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -81,6 +81,13 @@ int zfs_dmu_offset_next_sync = 0; */ int zfs_object_remap_one_indirect_delay_ms = 0; +/* + * Limit the amount we can prefetch with one call to this amount. This + * helps to limit the amount of memory that can be used by prefetching. + * Larger objects should be prefetched a bit at a time. + */ +int dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; + const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, @@ -667,6 +674,11 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, return; } + /* + * See comment before the definition of dmu_prefetch_max. + */ + len = MIN(len, dmu_prefetch_max); + /* * XXX - Note, if the dnode for the requested object is not * already cached, we will do a *synchronous* read in the @@ -2641,6 +2653,10 @@ module_param(zfs_dmu_offset_next_sync, int, 0644); MODULE_PARM_DESC(zfs_dmu_offset_next_sync, "Enable forcing txg sync to find holes"); +module_param(dmu_prefetch_max, int, 0644); +MODULE_PARM_DESC(dmu_prefetch_max, + "Limit one prefetch call to this size"); + /* END CSTYLED */ #endif diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index e8181d73feb9..53613cfd5659 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1348,13 +1348,6 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EINVAL)); } - error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir); - if (error != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dir_rele(pdd, FTAG); - return (error); - } - dsl_dataset_rele(origin, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1706,6 +1699,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; + int num_sublists; + multilist_t *ml; blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP); *blkptr_copy = *os->os_rootbp; @@ -1794,10 +1789,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) } } - for (int i = 0; - i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) { + ml = os->os_dirty_dnodes[txgoff]; + num_sublists = multilist_get_num_sublists(ml); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(ml, i)) + continue; sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP); - sda->sda_list = os->os_dirty_dnodes[txgoff]; + sda->sda_list = ml; sda->sda_sublist_idx = i; sda->sda_tx = tx; (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq, @@ -2109,6 +2107,8 @@ userquota_updates_task(void *arg) void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) { + int num_sublists; + if (!dmu_objset_userused_enabled(os)) return; @@ -2141,8 +2141,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } - for (int i = 0; - i < multilist_get_num_sublists(os->os_synced_dnodes); i++) { + num_sublists = multilist_get_num_sublists(os->os_synced_dnodes); + for (int i = 0; i < num_sublists; i++) { + if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i)) + continue; userquota_updates_arg_t *uua = kmem_alloc(sizeof (*uua), KM_SLEEP); uua->uua_os = os; diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index e85c18c4394d..1d7b558cb91a 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -114,21 +114,25 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0; boolean_t embed = (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) != 0; - /* temporary clone name must not exist */ + /* Temporary clone name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EBUSY : error); - /* new snapshot name must not exist */ + /* Resume state must not be set. */ + if (dsl_dataset_has_resume_receive_state(ds)) + return (SET_ERROR(EBUSY)); + + /* New snapshot name must not exist. */ error = zap_lookup(dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EEXIST : error); - /* must not have children if receiving a ZVOL */ + /* Must not have children if receiving a ZVOL. */ error = zap_count(dp->dp_meta_objset, dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &children); if (error != 0) @@ -355,7 +359,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); + error = dsl_dataset_hold(dp, buf, FTAG, &ds); if (error != 0) return (error); @@ -373,13 +377,13 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dmu_objset_create_crypt_check(ds->ds_dir, drba->drba_dcp, &will_encrypt); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (will_encrypt && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } } @@ -392,25 +396,25 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } /* can't recv below anything but filesystems (eg. no ZVOLs) */ error = dmu_objset_from_ds(ds, &os); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (dmu_objset_type(os) != DMU_OST_ZFS) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ZFS_ERR_WRONG_PARENT)); } @@ -420,31 +424,31 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold_flags(dp, drba->drba_origin, dsflags, FTAG, &origin); if (error != 0) { - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (error); } if (!origin->ds_is_snapshot) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); } if (origin->ds_dir->dd_crypto_obj != 0 && (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { dsl_dataset_rele_flags(origin, dsflags, FTAG); - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } dsl_dataset_rele_flags(origin, dsflags, FTAG); } - dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_dataset_rele(ds, FTAG); error = 0; } return (error); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index d0b2366bd3f8..3da4744bac9b 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -938,6 +938,25 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) txh = list_next(&tx->tx_holds, txh)) { dnode_t *dn = txh->txh_dnode; if (dn != NULL) { + /* + * This thread can't hold the dn_struct_rwlock + * while assigning the tx, because this can lead to + * deadlock. Specifically, if this dnode is already + * assigned to an earlier txg, this thread may need + * to wait for that txg to sync (the ERESTART case + * below). The other thread that has assigned this + * dnode to an earlier txg prevents this txg from + * syncing until its tx can complete (calling + * dmu_tx_commit()), but it may need to acquire the + * dn_struct_rwlock to do so (e.g. via + * dmu_buf_hold*()). + * + * Note that this thread can't hold the lock for + * read either, but the rwlock doesn't record + * enough information to make that assertion. + */ + ASSERT(!RW_WRITE_HELD(&dn->dn_struct_rwlock)); + mutex_enter(&dn->dn_mtx); if (dn->dn_assigned_txg == tx->tx_txg - 1) { mutex_exit(&dn->dn_mtx); @@ -1352,7 +1371,10 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) object = sa_handle_object(hdl); - dmu_tx_hold_bonus(tx, object); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; + DB_DNODE_ENTER(db); + dmu_tx_hold_bonus_by_dnode(tx, DB_DNODE(db)); + DB_DNODE_EXIT(db); if (tx->tx_objset->os_sa->sa_master_obj == 0) return; @@ -1374,7 +1396,6 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) ASSERT(tx->tx_txg == 0); dmu_tx_hold_spill(tx, object); } else { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; dnode_t *dn; DB_DNODE_ENTER(db); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index c06f614e1993..cc7bc5ec82c8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -55,7 +55,6 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, - { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, @@ -1255,6 +1254,10 @@ dnode_buf_evict_async(void *dbu) * as an extra dnode slot by an large dnode, in which case it returns * ENOENT. * + * If the DNODE_DRY_RUN flag is set, we don't actually hold the dnode, just + * return whether the hold would succeed or not. tag and dnp should set to + * NULL in this case. + * * errors: * EINVAL - Invalid object number or flags. * ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE) @@ -1283,6 +1286,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0)); ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0)); + IMPLY(flag & DNODE_DRY_RUN, (tag == NULL) && (dnp == NULL)); /* * If you are holding the spa config lock as writer, you shouldn't @@ -1312,8 +1316,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE) return (SET_ERROR(EEXIST)); DNODE_VERIFY(dn); - (void) zfs_refcount_add(&dn->dn_holds, tag); - *dnp = dn; + /* Don't actually hold if dry run, just return 0 */ + if (!(flag & DNODE_DRY_RUN)) { + (void) zfs_refcount_add(&dn->dn_holds, tag); + *dnp = dn; + } return (0); } @@ -1455,6 +1462,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(ENOENT)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + DNODE_STAT_BUMP(dnode_hold_alloc_hits); } else if (flag & DNODE_MUST_BE_FREE) { @@ -1512,6 +1527,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EEXIST)); } + /* Don't actually hold if dry run, just return 0 */ + if (flag & DNODE_DRY_RUN) { + mutex_exit(&dn->dn_mtx); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (0); + } + dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR); DNODE_STAT_BUMP(dnode_hold_free_hits); } else { @@ -1519,15 +1542,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, return (SET_ERROR(EINVAL)); } - if (dn->dn_free_txg) { - DNODE_STAT_BUMP(dnode_hold_free_txg); - type = dn->dn_type; - mutex_exit(&dn->dn_mtx); - dnode_slots_rele(dnc, idx, slots); - dbuf_rele(db, FTAG); - return (SET_ERROR((flag & DNODE_MUST_BE_ALLOCATED) ? - ENOENT : EEXIST)); - } + ASSERT0(dn->dn_free_txg); if (zfs_refcount_add(&dn->dn_holds, tag) == 1) dbuf_add_ref(db, dnh); @@ -1618,6 +1633,16 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) } } +/* + * Test whether we can create a dnode at the specified location. + */ +int +dnode_try_claim(objset_t *os, uint64_t object, int slots) +{ + return (dnode_hold_impl(os, object, DNODE_MUST_BE_FREE | DNODE_DRY_RUN, + slots, NULL, NULL)); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { @@ -2483,3 +2508,13 @@ dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset, return (error); } + +#if defined(_KERNEL) +EXPORT_SYMBOL(dnode_hold); +EXPORT_SYMBOL(dnode_rele); +EXPORT_SYMBOL(dnode_set_nlevels); +EXPORT_SYMBOL(dnode_set_blksz); +EXPORT_SYMBOL(dnode_free_range); +EXPORT_SYMBOL(dnode_evict_dbufs); +EXPORT_SYMBOL(dnode_evict_bonus); +#endif diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index 21db8e51ffd0..24711227ba55 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -1610,15 +1610,8 @@ dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) int ret; uint64_t curr_rddobj, parent_rddobj; - if (dd->dd_crypto_obj == 0) { - /* children of encrypted parents must be encrypted */ - if (newparent->dd_crypto_obj != 0) { - ret = SET_ERROR(EACCES); - goto error; - } - + if (dd->dd_crypto_obj == 0) return (0); - } ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); if (ret != 0) @@ -1683,11 +1676,15 @@ dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) * Check that the parent of the target has the same encryption root. */ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); - if (ret != 0) + if (ret == ENOENT) + return (SET_ERROR(EACCES)); + else if (ret != 0) return (ret); if (op_rddobj != tp_rddobj) @@ -1747,34 +1744,6 @@ dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, kmem_free(keylocation, ZAP_MAXVALUELEN); } -int -dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd) -{ - int ret; - uint64_t pcrypt, crypt; - - /* - * Check that we are not making an unencrypted child of an - * encrypted parent. - */ - ret = dsl_dir_get_crypt(parentdd, &pcrypt); - if (ret != 0) - return (ret); - - ret = dsl_dir_get_crypt(origindd, &crypt); - if (ret != 0) - return (ret); - - ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); - ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - - return (0); -} - - int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt) @@ -1805,13 +1774,6 @@ dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); - /* - * We can't create an unencrypted child of an encrypted parent - * under any circumstances. - */ - if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) - return (SET_ERROR(EINVAL)); - /* check for valid dcp with no encryption (inherited or local) */ if (crypt == ZIO_CRYPT_OFF) { /* Must not specify encryption params */ @@ -2662,11 +2624,13 @@ dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) } if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { - VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, - &enc_root)); - dsl_dir_name(enc_root, buf); - dsl_dir_rele(enc_root, FTAG); - dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); + if (dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, + &enc_root) == 0) { + dsl_dir_name(enc_root, buf); + dsl_dir_rele(enc_root, FTAG); + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_ENCRYPTION_ROOT, buf); + } } } diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 3b3aceb77da6..8b8f2b4a402a 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -1058,9 +1058,10 @@ dsl_destroy_head(const char *name) /* * Head deletion is processed in one txg on old pools; * remove the objects from open context so that the txg sync - * is not too long. + * is not too long. This optimization can only work for + * encrypted datasets if the wrapping key is loaded. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE, FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = @@ -1072,7 +1073,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, B_FALSE, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); } } diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index b6d3f133b80c..d7a6b2ae1654 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -23,7 +23,7 @@ * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2017 Datto Inc. - * Copyright 2017 Joyent, Inc. + * Copyright 2019 Joyent, Inc. */ #include @@ -952,13 +952,16 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) * will find the drives that need to be resilvered * when the machine reboots and start the resilver then. */ - boolean_t resilver_needed = - dsl_scan_clear_deferred(spa->spa_root_vdev, tx); - if (resilver_needed) { - spa_history_log_internal(spa, - "starting deferred resilver", tx, - "errors=%llu", spa_get_errlog_size(spa)); - spa_async_request(spa, SPA_ASYNC_RESILVER); + if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) { + boolean_t resilver_needed = + dsl_scan_clear_deferred(spa->spa_root_vdev, tx); + if (resilver_needed) { + spa_history_log_internal(spa, + "starting deferred resilver", tx, + "errors=%llu", + (u_longlong_t)spa_get_errlog_size(spa)); + spa_async_request(spa, SPA_ASYNC_RESILVER); + } } } @@ -2173,16 +2176,17 @@ ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2, } /* - * Called when a parent dataset and its clone are swapped. If we were + * Called when an origin dataset and its clone are swapped. If we were * currently traversing the dataset, we need to switch to traversing the - * newly promoted parent. + * newly promoted clone. */ void dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) { dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; - uint64_t mintxg; + uint64_t mintxg1, mintxg2; + boolean_t ds1_queued, ds2_queued; if (!dsl_scan_is_running(scn)) return; @@ -2190,44 +2194,81 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); - if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) { - scan_ds_queue_remove(scn, ds1->ds_object); - scan_ds_queue_insert(scn, ds2->ds_object, mintxg); + /* + * Handle the in-memory scan queue. + */ + ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1); + ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2); + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } - if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) { + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * The swapping code below would not handle this case correctly, + * since we can't insert ds2 if it is already there. That's + * because scan_ds_queue_insert() prohibits a duplicate insert + * and panics. + */ + } else if (ds1_queued) { + scan_ds_queue_remove(scn, ds1->ds_object); + scan_ds_queue_insert(scn, ds2->ds_object, mintxg1); + } else if (ds2_queued) { scan_ds_queue_remove(scn, ds2->ds_object); - scan_ds_queue_insert(scn, ds1->ds_object, mintxg); + scan_ds_queue_insert(scn, ds1->ds_object, mintxg2); } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds1->ds_object, &mintxg) == 0) { - int err; - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + /* + * Handle the on-disk scan queue. + * The on-disk state is an out-of-date version of the in-memory state, + * so the in-memory and on-disk values for ds1_queued and ds2_queued may + * be different. Therefore we need to apply the swap logic to the + * on-disk state independently of the in-memory state. + */ + ds1_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0; + ds2_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0; + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * Alternatively, we could check for EEXIST from + * zap_add_int_key() and back out to the original state, but + * that would be more work than checking for this case upfront. + */ + } else if (ds1_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); - err = zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); - VERIFY(err == 0 || err == EEXIST); - if (err == EEXIST) { - /* Both were there to begin with */ - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, - ds1->ds_object, mintxg, tx)); - } + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); - } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds2->ds_object, &mintxg) == 0) { - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + } else if (ds2_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds2->ds_object, diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index d62bee3caf42..ad0fb32e9ee8 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -103,12 +103,27 @@ int zfs_mg_noalloc_threshold = 0; /* * Metaslab groups are considered eligible for allocations if their - * fragmenation metric (measured as a percentage) is less than or equal to - * zfs_mg_fragmentation_threshold. If a metaslab group exceeds this threshold - * then it will be skipped unless all metaslab groups within the metaslab - * class have also crossed this threshold. + * fragmenation metric (measured as a percentage) is less than or + * equal to zfs_mg_fragmentation_threshold. If a metaslab group + * exceeds this threshold then it will be skipped unless all metaslab + * groups within the metaslab class have also crossed this threshold. + * + * This tunable was introduced to avoid edge cases where we continue + * allocating from very fragmented disks in our pool while other, less + * fragmented disks, exists. On the other hand, if all disks in the + * pool are uniformly approaching the threshold, the threshold can + * be a speed bump in performance, where we keep switching the disks + * that we allocate from (e.g. we allocate some segments from disk A + * making it bypassing the threshold while freeing segments from disk + * B getting its fragmentation below the threshold). + * + * Empirically, we've seen that our vdev selection for allocations is + * good enough that fragmentation increases uniformly across all vdevs + * the majority of the time. Thus we set the threshold percentage high + * enough to avoid hitting the speed bump on pools that are being pushed + * to the edge. */ -int zfs_mg_fragmentation_threshold = 85; +int zfs_mg_fragmentation_threshold = 95; /* * Allow metaslabs to keep their active state as long as their fragmentation @@ -2980,6 +2995,30 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) dmu_tx_commit(tx); } +void +metaslab_potentially_unload(metaslab_t *msp, uint64_t txg) +{ + /* + * If the metaslab is loaded and we've not tried to load or allocate + * from it in 'metaslab_unload_delay' txgs, then unload it. + */ + if (msp->ms_loaded && + msp->ms_disabled == 0 && + msp->ms_selected_txg + metaslab_unload_delay < txg) { + for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { + VERIFY0(range_tree_space( + msp->ms_allocating[(txg + t) & TXG_MASK])); + } + if (msp->ms_allocator != -1) { + metaslab_passivate(msp, msp->ms_weight & + ~METASLAB_ACTIVE_MASK); + } + + if (!metaslab_debug_unload) + metaslab_unload(msp); + } +} + /* * Called after a transaction group has completely synced to mark * all of the metaslab's free space as usable. @@ -3117,27 +3156,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) */ metaslab_recalculate_weight_and_sort(msp); - /* - * If the metaslab is loaded and we've not tried to load or allocate - * from it in 'metaslab_unload_delay' txgs, then unload it. - */ - if (msp->ms_loaded && - msp->ms_disabled == 0 && - msp->ms_selected_txg + metaslab_unload_delay < txg) { - - for (int t = 1; t < TXG_CONCURRENT_STATES; t++) { - VERIFY0(range_tree_space( - msp->ms_allocating[(txg + t) & TXG_MASK])); - } - if (msp->ms_allocator != -1) { - metaslab_passivate(msp, msp->ms_weight & - ~METASLAB_ACTIVE_MASK); - } - - if (!metaslab_debug_unload) - metaslab_unload(msp); - } - ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK])); ASSERT0(range_tree_space(msp->ms_freeing)); ASSERT0(range_tree_space(msp->ms_freed)); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index 2a594c56cbd5..b74ee0f0670a 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -363,6 +363,28 @@ multilist_sublist_remove(multilist_sublist_t *mls, void *obj) list_remove(&mls->mls_list, obj); } +int +multilist_sublist_is_empty(multilist_sublist_t *mls) +{ + ASSERT(MUTEX_HELD(&mls->mls_lock)); + return (list_is_empty(&mls->mls_list)); +} + +int +multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) +{ + multilist_sublist_t *mls; + int empty; + + ASSERT3U(sublist_idx, <, ml->ml_num_sublists); + mls = &ml->ml_sublists[sublist_idx]; + ASSERT(!MUTEX_HELD(&mls->mls_lock)); + mutex_enter(&mls->mls_lock); + empty = list_is_empty(&mls->mls_list); + mutex_exit(&mls->mls_lock); + return (empty); +} + void * multilist_sublist_head(multilist_sublist_t *mls) { diff --git a/module/zfs/pathname.c b/module/zfs/pathname.c index e3e97c9bb365..4766762f37d1 100644 --- a/module/zfs/pathname.c +++ b/module/zfs/pathname.c @@ -71,9 +71,12 @@ pn_alloc(struct pathname *pnp) void pn_alloc_sz(struct pathname *pnp, size_t sz) { - pnp->pn_path = pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); - pnp->pn_pathlen = 0; + pnp->pn_buf = kmem_alloc(sz, KM_SLEEP); pnp->pn_bufsize = sz; +#if 0 /* unused in ZoL */ + pnp->pn_path = pnp->pn_buf; + pnp->pn_pathlen = 0; +#endif } /* @@ -84,6 +87,10 @@ pn_free(struct pathname *pnp) { /* pn_bufsize is usually MAXPATHLEN, but may not be */ kmem_free(pnp->pn_buf, pnp->pn_bufsize); - pnp->pn_path = pnp->pn_buf = NULL; - pnp->pn_pathlen = pnp->pn_bufsize = 0; + pnp->pn_buf = NULL; + pnp->pn_bufsize = 0; +#if 0 /* unused in ZoL */ + pnp->pn_path = NULL; + pnp->pn_pathlen = 0; +#endif } diff --git a/module/zfs/policy.c b/module/zfs/policy.c index 55c932747915..a723235d3015 100644 --- a/module/zfs/policy.c +++ b/module/zfs/policy.c @@ -209,7 +209,7 @@ secpolicy_vnode_setdac(const cred_t *cr, uid_t owner) int secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot) { - return (0); + return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM)); } /* diff --git a/module/zfs/qat.c b/module/zfs/qat.c index a6f024cb44d7..08613b3a2042 100644 --- a/module/zfs/qat.c +++ b/module/zfs/qat.c @@ -21,7 +21,7 @@ #if defined(_KERNEL) && defined(HAVE_QAT) #include -#include "qat.h" +#include qat_stats_t qat_stats = { { "comp_requests", KSTAT_DATA_UINT64 }, diff --git a/module/zfs/qat.h b/module/zfs/qat.h index 9014c03148ba..5c1cd15d09d6 100644 --- a/module/zfs/qat.h +++ b/module/zfs/qat.h @@ -40,11 +40,6 @@ typedef enum qat_encrypt_dir { #include "dc/cpa_dc.h" #include "lac/cpa_cy_sym.h" -/* - * Timeout - no response from hardware after 0.5 seconds - */ -#define QAT_TIMEOUT_MS 500 - /* * The minimal and maximal buffer size which are not restricted * in the QAT hardware, but with the input buffer size between 4KB diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 1c5c0a4e7256..46ccb997a3b7 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -28,7 +28,7 @@ #include #include #include -#include "qat.h" +#include /* * Max instances in a QAT device, each instance is a channel to submit @@ -404,11 +404,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; @@ -463,11 +459,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, } /* we now wait until the completion of the operation. */ - if (!wait_for_completion_interruptible_timeout(&complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + wait_for_completion(&complete); if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; @@ -547,7 +539,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, } static int -param_set_qat_compress(const char *val, struct kernel_param *kp) +param_set_qat_compress(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c index 34c19b5823a8..1e77f143e3ec 100644 --- a/module/zfs/qat_crypt.c +++ b/module/zfs/qat_crypt.c @@ -36,7 +36,7 @@ #include #include "lac/cpa_cy_im.h" #include "lac/cpa_cy_common.h" -#include "qat.h" +#include /* * Max instances in a QAT device, each instance is a channel to submit @@ -415,6 +415,9 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, op_data.messageLenToCipherInBytes = enc_len; op_data.ivLenInBytes = ZIO_DATA_IV_LEN; bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN); + /* if dir is QAT_DECRYPT, copy digest_buf to pDigestResult */ + if (dir == QAT_DECRYPT) + bcopy(digest_buf, op_data.pDigestResult, ZIO_DATA_MAC_LEN); cb.verify_result = CPA_FALSE; init_completion(&cb.complete); @@ -423,23 +426,21 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; } - /* save digest result to digest_buf */ - bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); - if (dir == QAT_ENCRYPT) + if (dir == QAT_ENCRYPT) { + /* if dir is QAT_ENCRYPT, save pDigestResult to digest_buf */ + bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN); QAT_STAT_INCR(encrypt_total_out_bytes, enc_len); - else + } else { QAT_STAT_INCR(decrypt_total_out_bytes, enc_len); + } fail: if (status != CPA_STATUS_SUCCESS) @@ -549,11 +550,9 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) if (status != CPA_STATUS_SUCCESS) goto fail; - if (!wait_for_completion_interruptible_timeout(&cb.complete, - QAT_TIMEOUT_MS)) { - status = CPA_STATUS_FAIL; - goto fail; - } + /* we now wait until the completion of the operation. */ + wait_for_completion(&cb.complete); + if (cb.verify_result == CPA_FALSE) { status = CPA_STATUS_FAIL; goto fail; @@ -578,7 +577,7 @@ qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp) } static int -param_set_qat_encrypt(const char *val, struct kernel_param *kp) +param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; @@ -600,7 +599,7 @@ param_set_qat_encrypt(const char *val, struct kernel_param *kp) } static int -param_set_qat_checksum(const char *val, struct kernel_param *kp) +param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp) { int ret; int *pvalue = kp->arg; diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 56a606962a7f..4999fef345dc 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1380,7 +1380,7 @@ sa_handle_destroy(sa_handle_t *hdl) dmu_buf_rele(hdl->sa_bonus, NULL); if (hdl->sa_spill) - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); mutex_exit(&hdl->sa_lock); kmem_cache_free(sa_cache, hdl); @@ -2028,7 +2028,7 @@ sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count, hdl->sa_spill_tab = NULL; } - dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL); + dmu_buf_rele(hdl->sa_spill, NULL); hdl->sa_spill = NULL; } @@ -2131,13 +2131,13 @@ sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx) void sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi) { - dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi); + dmu_object_info_from_db(hdl->sa_bonus, doi); } void sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks) { - dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus, + dmu_object_size_from_db(hdl->sa_bonus, blksize, nblocks); } @@ -2150,7 +2150,7 @@ sa_set_userp(sa_handle_t *hdl, void *ptr) dmu_buf_t * sa_get_db(sa_handle_t *hdl) { - return ((dmu_buf_t *)hdl->sa_bonus); + return (hdl->sa_bonus); } void * diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 49d7c24879dc..9bec2e8d2b36 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -567,8 +567,13 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (!error && intval > 1) error = SET_ERROR(EINVAL); - if (!error && !spa_get_hostid()) - error = SET_ERROR(ENOTSUP); + if (!error) { + uint32_t hostid = zone_get_hostid(NULL); + if (hostid) + spa->spa_hostid = hostid; + else + error = SET_ERROR(ENOTSUP); + } break; @@ -2540,7 +2545,7 @@ spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); - if (hostid == spa_get_hostid()) + if (hostid == spa_get_hostid(spa)) return (B_FALSE); /* @@ -3059,7 +3064,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) spa->spa_config); if (activity_check) { if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && - spa_get_hostid() == 0) { + spa_get_hostid(spa) == 0) { nvlist_free(label); fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); @@ -3739,7 +3744,7 @@ spa_ld_load_vdev_metadata(spa_t *spa) * be imported when the system hostid is zero. The exception to * this rule is zdb which is always allowed to access pools. */ - if (spa_multihost(spa) && spa_get_hostid() == 0 && + if (spa_multihost(spa) && spa_get_hostid(spa) == 0 && (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); @@ -5968,8 +5973,8 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_remove(spa); } - mutex_exit(&spa_namespace_lock); + mutex_exit(&spa_namespace_lock); return (0); fail: diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index dae4a4867989..1f03747e7c70 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -93,8 +93,7 @@ spa_config_load(void) */ pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(pathname, MAXPATHLEN, "%s%s", - (rootdir != NULL) ? "./" : "", spa_config_path); + (void) snprintf(pathname, MAXPATHLEN, "%s", spa_config_path); file = kobj_open_file(pathname); @@ -458,7 +457,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment); - hostid = spa_get_hostid(); + hostid = spa_get_hostid(spa); if (hostid != 0) fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 1455cc5fa1a6..18df04b342b0 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -677,6 +677,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_proc = &p0; spa->spa_proc_state = SPA_PROC_NONE; spa->spa_trust_config = B_TRUE; + spa->spa_hostid = zone_get_hostid(NULL); spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms); @@ -2626,22 +2627,10 @@ spa_multihost(spa_t *spa) return (spa->spa_multihost ? B_TRUE : B_FALSE); } -unsigned long -spa_get_hostid(void) +uint32_t +spa_get_hostid(spa_t *spa) { - unsigned long myhostid; - -#ifdef _KERNEL - myhostid = zone_get_hostid(NULL); -#else /* _KERNEL */ - /* - * We're emulating the system's hostid in userland, so - * we can't use zone_get_hostid(). - */ - (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); -#endif /* _KERNEL */ - - return (myhostid); + return (spa->spa_hostid); } boolean_t diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 9509c925c135..503d4f42ee36 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -3262,6 +3262,20 @@ vdev_sync_done(vdev_t *vd, uint64_t txg) != NULL) metaslab_sync_done(msp, txg); + /* + * Because this function is only called on dirty vdevs, it's possible + * we won't consider all metaslabs for unloading on every + * txg. However, unless the system is largely idle it is likely that + * we will dirty all vdevs within a few txgs. + */ + for (int i = 0; i < vd->vdev_ms_count; i++) { + msp = vd->vdev_ms[i]; + mutex_enter(&msp->ms_lock); + if (msp->ms_sm != NULL) + metaslab_potentially_unload(msp, txg); + mutex_exit(&msp->ms_lock); + } + if (reassess) metaslab_sync_reassess(vd->vdev_mg); } diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 1419ae6ad54a..46437f21fb78 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -220,7 +220,7 @@ vdev_elevator_switch(vdev_t *v, char *elevator) char *envp[] = { NULL }; argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); - error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + error = call_usermodehelper(argv[0], argv, envp, UMH_NO_WAIT); strfree(argv[2]); #endif /* HAVE_ELEVATOR_CHANGE */ if (error) { @@ -935,19 +935,19 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) } vdev_ops_t vdev_disk_ops = { - vdev_disk_open, - vdev_disk_close, - vdev_default_asize, - vdev_disk_io_start, - vdev_disk_io_done, - NULL, - NULL, - vdev_disk_hold, - vdev_disk_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_disk_open, + .vdev_op_close = vdev_disk_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_disk_io_start, + .vdev_op_io_done = vdev_disk_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_disk_hold, + .vdev_op_rele = vdev_disk_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler, diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index c155057852a3..b79017f3a610 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -277,19 +277,19 @@ vdev_file_io_done(zio_t *zio) } vdev_ops_t vdev_file_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_FILE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_FILE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; void @@ -313,19 +313,19 @@ vdev_file_fini(void) #ifndef _KERNEL vdev_ops_t vdev_disk_ops = { - vdev_file_open, - vdev_file_close, - vdev_default_asize, - vdev_file_io_start, - vdev_file_io_done, - NULL, - NULL, - vdev_file_hold, - vdev_file_rele, - NULL, - vdev_default_xlate, - VDEV_TYPE_DISK, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_file_open, + .vdev_op_close = vdev_file_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_file_io_start, + .vdev_op_io_done = vdev_file_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = vdev_file_hold, + .vdev_op_rele = vdev_file_rele, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; #endif diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index d9e08cbb6414..48f9b7efa11e 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1847,19 +1847,19 @@ vdev_indirect_io_done(zio_t *zio) } vdev_ops_t vdev_indirect_ops = { - vdev_indirect_open, - vdev_indirect_close, - vdev_default_asize, - vdev_indirect_io_start, - vdev_indirect_io_done, - NULL, - NULL, - NULL, - NULL, - vdev_indirect_remap, - NULL, - VDEV_TYPE_INDIRECT, /* name of this vdev type */ - B_FALSE /* leaf vdev */ + .vdev_op_open = vdev_indirect_open, + .vdev_op_close = vdev_indirect_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_indirect_io_start, + .vdev_op_io_done = vdev_indirect_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = vdev_indirect_remap, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_INDIRECT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_indirect_births.c b/module/zfs/vdev_indirect_births.c index 1c44a64287d3..99b83c392257 100644 --- a/module/zfs/vdev_indirect_births.c +++ b/module/zfs/vdev_indirect_births.c @@ -70,7 +70,7 @@ vdev_indirect_births_close(vdev_indirect_births_t *vib) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - kmem_free(vib->vib_entries, births_size); + vmem_free(vib->vib_entries, births_size); vib->vib_entries = NULL; } @@ -108,7 +108,7 @@ vdev_indirect_births_open(objset_t *os, uint64_t births_object) if (vib->vib_phys->vib_count > 0) { uint64_t births_size = vdev_indirect_births_size_impl(vib); - vib->vib_entries = kmem_alloc(births_size, KM_SLEEP); + vib->vib_entries = vmem_alloc(births_size, KM_SLEEP); VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0, births_size, vib->vib_entries, DMU_READ_PREFETCH)); } @@ -148,10 +148,10 @@ vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); - new_entries = kmem_alloc(new_size, KM_SLEEP); + new_entries = vmem_alloc(new_size, KM_SLEEP); if (old_size > 0) { bcopy(vib->vib_entries, new_entries, old_size); - kmem_free(vib->vib_entries, old_size); + vmem_free(vib->vib_entries, old_size); } new_entries[vib->vib_phys->vib_count - 1] = vibe; vib->vib_entries = new_entries; diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index bac9aab1af85..188af5151eac 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -21,8 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -613,7 +612,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * zfs_remove_max_segment, so we need at least one entry * per zfs_remove_max_segment of allocated data. */ - seg_count += to_alloc / zfs_remove_max_segment; + seg_count += to_alloc / spa_remove_max_segment(spa); fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE, seg_count * diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 59cc2dcdd2ca..23ff75bfc96f 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -786,51 +786,51 @@ vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_mirror_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_MIRROR, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_MIRROR, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_replacing_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_REPLACING, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_REPLACING, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; vdev_ops_t vdev_spare_ops = { - vdev_mirror_open, - vdev_mirror_close, - vdev_default_asize, - vdev_mirror_io_start, - vdev_mirror_io_done, - vdev_mirror_state_change, - NULL, - NULL, - NULL, - NULL, - vdev_default_xlate, - VDEV_TYPE_SPARE, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_mirror_open, + .vdev_op_close = vdev_mirror_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_mirror_io_start, + .vdev_op_io_done = vdev_mirror_io_done, + .vdev_op_state_change = vdev_mirror_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_default_xlate, + .vdev_op_type = VDEV_TYPE_SPARE, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; #if defined(_KERNEL) diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index d85993bff052..205b23eba7f5 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -80,33 +80,33 @@ vdev_missing_io_done(zio_t *zio) } vdev_ops_t vdev_missing_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_MISSING, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_MISSING, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; vdev_ops_t vdev_hole_ops = { - vdev_missing_open, - vdev_missing_close, - vdev_default_asize, - vdev_missing_io_start, - vdev_missing_io_done, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_HOLE, /* name of this vdev type */ - B_TRUE /* leaf vdev */ + .vdev_op_open = vdev_missing_open, + .vdev_op_close = vdev_missing_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = vdev_missing_io_start, + .vdev_op_io_done = vdev_missing_io_done, + .vdev_op_state_change = NULL, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_HOLE, /* name of this vdev type */ + .vdev_op_leaf = B_TRUE /* leaf vdev */ }; diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index e74df76b7530..86b20f134834 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -709,6 +709,18 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) do { dio = nio; nio = AVL_NEXT(t, dio); + zio_add_child(dio, aio); + vdev_queue_io_remove(vq, dio); + } while (dio != last); + + /* + * We need to drop the vdev queue's lock during zio_execute() to + * avoid a deadlock that we could encounter due to lock order + * reversal between vq_lock and io_lock in zio_change_priority(). + * Use the dropped lock to do memory copy without congestion. + */ + mutex_exit(&vq->vq_lock); + while ((dio = zio_walk_parents(aio, &zl)) != NULL) { ASSERT3U(dio->io_type, ==, aio->io_type); if (dio->io_flags & ZIO_FLAG_NODATA) { @@ -720,16 +732,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) dio->io_offset - aio->io_offset, 0, dio->io_size); } - zio_add_child(dio, aio); - vdev_queue_io_remove(vq, dio); - } while (dio != last); - - /* - * We need to drop the vdev queue's lock to avoid a deadlock that we - * could encounter since this I/O will complete immediately. - */ - mutex_exit(&vq->vq_lock); - while ((dio = zio_walk_parents(aio, &zl)) != NULL) { zio_vdev_io_bypass(dio); zio_execute(dio); } diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 215cd1c12064..327b186713fa 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2403,17 +2403,17 @@ vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res) } vdev_ops_t vdev_raidz_ops = { - vdev_raidz_open, - vdev_raidz_close, - vdev_raidz_asize, - vdev_raidz_io_start, - vdev_raidz_io_done, - vdev_raidz_state_change, - vdev_raidz_need_resilver, - NULL, - NULL, - NULL, - vdev_raidz_xlate, - VDEV_TYPE_RAIDZ, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_raidz_open, + .vdev_op_close = vdev_raidz_close, + .vdev_op_asize = vdev_raidz_asize, + .vdev_op_io_start = vdev_raidz_io_start, + .vdev_op_io_done = vdev_raidz_io_done, + .vdev_op_state_change = vdev_raidz_state_change, + .vdev_op_need_resilver = vdev_raidz_need_resilver, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = vdev_raidz_xlate, + .vdev_op_type = VDEV_TYPE_RAIDZ, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index e6112bc02137..3ef67768f916 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -472,7 +472,7 @@ vdev_raidz_math_init(void) return; #endif - /* Fake an zio and run the benchmark on a warmed up buffer */ + /* Fake a zio and run the benchmark on a warmed up buffer */ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); bench_zio->io_offset = 0; bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ diff --git a/module/zfs/vdev_raidz_math_scalar.c b/module/zfs/vdev_raidz_math_scalar.c index a693bff63ffb..cd742e146ca6 100644 --- a/module/zfs/vdev_raidz_math_scalar.c +++ b/module/zfs/vdev_raidz_math_scalar.c @@ -142,6 +142,7 @@ static const struct { a.b[6] = mul_lt[a.b[6]]; \ a.b[5] = mul_lt[a.b[5]]; \ a.b[4] = mul_lt[a.b[4]]; \ + /* falls through */ \ case 4: \ a.b[3] = mul_lt[a.b[3]]; \ a.b[2] = mul_lt[a.b[2]]; \ diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index c104c3fc8329..488fe69c3b3e 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. */ #include @@ -100,6 +100,8 @@ int zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * removing a device. This can be no larger than SPA_MAXBLOCKSIZE. If * there is a performance problem with attempting to allocate large blocks, * consider decreasing this. + * + * See also the accessor function spa_remove_max_segment(). */ int zfs_remove_max_segment = SPA_MAXBLOCKSIZE; @@ -951,8 +953,10 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, vdev_indirect_mapping_entry_t *entry; dva_t dst = {{ 0 }}; uint64_t start = range_tree_min(segs); + ASSERT0(P2PHASE(start, 1 << spa->spa_min_ashift)); ASSERT3U(maxalloc, <=, SPA_MAXBLOCKSIZE); + ASSERT0(P2PHASE(maxalloc, 1 << spa->spa_min_ashift)); uint64_t size = range_tree_span(segs); if (range_tree_span(segs) > maxalloc) { @@ -983,6 +987,7 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, } } ASSERT3U(size, <=, maxalloc); + ASSERT0(P2PHASE(size, 1 << spa->spa_min_ashift)); /* * An allocation class might not have any remaining vdevs or space @@ -1026,11 +1031,11 @@ spa_vdev_copy_segment(vdev_t *vd, range_tree_t *segs, /* * We can't have any padding of the allocated size, otherwise we will - * misunderstand what's allocated, and the size of the mapping. - * The caller ensures this will be true by passing in a size that is - * aligned to the worst (highest) ashift in the pool. + * misunderstand what's allocated, and the size of the mapping. We + * prevent padding by ensuring that all devices in the pool have the + * same ashift, and the allocation size is a multiple of the ashift. */ - ASSERT3U(DVA_GET_ASIZE(&dst), ==, size); + VERIFY3U(DVA_GET_ASIZE(&dst), ==, size); entry = kmem_zalloc(sizeof (vdev_indirect_mapping_entry_t), KM_SLEEP); DVA_MAPPING_SET_SRC_OFFSET(&entry->vime_mapping, start); @@ -1363,6 +1368,20 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, range_tree_destroy(segs); } +/* + * The size of each removal mapping is limited by the tunable + * zfs_remove_max_segment, but we must adjust this to be a multiple of the + * pool's ashift, so that we don't try to split individual sectors regardless + * of the tunable value. (Note that device removal requires that all devices + * have the same ashift, so there's no difference between spa_min_ashift and + * spa_max_ashift.) The raw tunable should not be used elsewhere. + */ +uint64_t +spa_remove_max_segment(spa_t *spa) +{ + return (P2ROUNDUP(zfs_remove_max_segment, 1 << spa->spa_max_ashift)); +} + /* * The removal thread operates in open context. It iterates over all * allocated space in the vdev, by loading each metaslab's spacemap. @@ -1385,7 +1404,7 @@ spa_vdev_remove_thread(void *arg) spa_t *spa = arg; spa_vdev_removal_t *svr = spa->spa_vdev_removal; vdev_copy_arg_t vca; - uint64_t max_alloc = zfs_remove_max_segment; + uint64_t max_alloc = spa_remove_max_segment(spa); uint64_t last_txg = 0; spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); @@ -1507,6 +1526,7 @@ spa_vdev_remove_thread(void *arg) dmu_tx_abort(tx); goto done; } + uint64_t txg = dmu_tx_get_txg(tx); /* @@ -1518,7 +1538,7 @@ spa_vdev_remove_thread(void *arg) vd = vdev_lookup_top(spa, svr->svr_vdev_id); if (txg != last_txg) - max_alloc = zfs_remove_max_segment; + max_alloc = spa_remove_max_segment(spa); last_txg = txg; spa_vdev_copy_impl(vd, svr, &vca, &max_alloc, tx); diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index e40b7ce8e4e8..7170f7013608 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -140,17 +140,17 @@ vdev_root_state_change(vdev_t *vd, int faulted, int degraded) } vdev_ops_t vdev_root_ops = { - vdev_root_open, - vdev_root_close, - vdev_default_asize, - NULL, /* io_start - not applicable to the root */ - NULL, /* io_done - not applicable to the root */ - vdev_root_state_change, - NULL, - NULL, - NULL, - NULL, - NULL, - VDEV_TYPE_ROOT, /* name of this vdev type */ - B_FALSE /* not a leaf vdev */ + .vdev_op_open = vdev_root_open, + .vdev_op_close = vdev_root_close, + .vdev_op_asize = vdev_default_asize, + .vdev_op_io_start = NULL, /* not applicable to the root */ + .vdev_op_io_done = NULL, /* not applicable to the root */ + .vdev_op_state_change = vdev_root_state_change, + .vdev_op_need_resilver = NULL, + .vdev_op_hold = NULL, + .vdev_op_rele = NULL, + .vdev_op_remap = NULL, + .vdev_op_xlate = NULL, + .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */ + .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 4222e2c9a653..85755e64c500 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -49,6 +49,36 @@ #include #include +/* + * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object + * (all leaf blocks) when we start iterating over it. + * + * For zap_cursor_init(), the callers all intend to iterate through all the + * entries. There are a few cases where an error (typically i/o error) could + * cause it to bail out early. + * + * For zap_cursor_init_serialized(), there are callers that do the iteration + * outside of ZFS. Typically they would iterate over everything, but we + * don't have control of that. E.g. zfs_ioc_snapshot_list_next(), + * zcp_snapshots_iter(), and other iterators over things in the MOS - these + * are called by /sbin/zfs and channel programs. The other example is + * zfs_readdir() which iterates over directory entries for the getdents() + * syscall. /sbin/ls iterates to the end (unless it receives a signal), but + * userland doesn't have to. + * + * Given that the ZAP entries aren't returned in a specific order, the only + * legitimate use cases for partial iteration would be: + * + * 1. Pagination: e.g. you only want to display 100 entries at a time, so you + * get the first 100 and then wait for the user to hit "next page", which + * they may never do). + * + * 2. You want to know if there are more than X entries, without relying on + * the zfs-specific implementation of the directory's st_size (which is + * the number of entries). + */ +int zap_iterate_prefetch = B_TRUE; + int fzap_default_block_shift = 14; /* 16k blocksize */ extern inline zap_phys_t *zap_f_phys(zap_t *zap); @@ -1198,6 +1228,21 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) /* retrieve the next entry at or after zc_hash/zc_cd */ /* if no entry, return ENOENT */ + /* + * If we are reading from the beginning, we're almost certain to + * iterate over the entire ZAP object. If there are multiple leaf + * blocks (freeblk > 2), prefetch the whole object (up to + * dmu_prefetch_max bytes), so that we read the leaf blocks + * concurrently. (Unless noprefetch was requested via + * zap_cursor_init_noprefetch()). + */ + if (zc->zc_hash == 0 && zap_iterate_prefetch && + zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) { + dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0, + zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap), + ZIO_PRIORITY_ASYNC_READ); + } + if (zc->zc_leaf && (ZAP_HASH_IDX(zc->zc_hash, zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) != @@ -1342,3 +1387,12 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs) } } } + +#if defined(_KERNEL) +/* BEGIN CSTYLED */ +module_param(zap_iterate_prefetch, int, 0644); +MODULE_PARM_DESC(zap_iterate_prefetch, + "When iterating ZAP object, prefetch it"); + +/* END CSTYLED */ +#endif diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index fa369f797548..467812ff637c 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. */ @@ -1472,9 +1472,9 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, * Routines for iterating over the attributes. */ -void -zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, - uint64_t serialized) +static void +zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized, boolean_t prefetch) { zc->zc_objset = os; zc->zc_zap = NULL; @@ -1483,12 +1483,33 @@ zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, zc->zc_serialized = serialized; zc->zc_hash = 0; zc->zc_cd = 0; + zc->zc_prefetch = prefetch; +} +void +zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized) +{ + zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); } +/* + * Initialize a cursor at the beginning of the ZAP object. The entire + * ZAP object will be prefetched. + */ void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { - zap_cursor_init_serialized(zc, os, zapobj, 0); + zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); +} + +/* + * Initialize a cursor at the beginning, but request that we not prefetch + * the entire ZAP object. + */ +void +zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) +{ + zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); } void diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c index ed98f0d1025b..0a5f0b8242ab 100644 --- a/module/zfs/zcp_get.c +++ b/module/zfs/zcp_get.c @@ -423,13 +423,11 @@ get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname, case ZFS_PROP_RECEIVE_RESUME_TOKEN: { char *token = get_receive_resume_stats_impl(ds); - VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, token, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) { char *childval = get_child_receive_stats(ds); - VERIFY3U(strlcpy(strval, childval, ZAP_MAXVALUELEN), - <, ZAP_MAXVALUELEN); + (void) strlcpy(strval, childval, ZAP_MAXVALUELEN); if (strcmp(strval, "") == 0) error = ENOENT; diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 46e6e19b91d5..8acbbb61ca9d 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -30,6 +30,7 @@ * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018 George Melikov. All Rights Reserved. + * Copyright (c) 2019 Datto, Inc. All rights reserved. */ /* @@ -85,6 +86,7 @@ #include #include #include +#include #include "zfs_namecheck.h" /* @@ -190,7 +192,7 @@ static void zfsctl_snapshot_add(zfs_snapentry_t *se) { ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); avl_add(&zfs_snapshots_by_name, se); avl_add(&zfs_snapshots_by_objsetid, se); } @@ -267,7 +269,7 @@ zfsctl_snapshot_find_by_name(char *snapname) search.se_name = snapname; se = avl_find(&zfs_snapshots_by_name, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } @@ -288,7 +290,7 @@ zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) search.se_objsetid = objsetid; se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); if (se) - zfs_refcount_add(&se->se_refcount, NULL); + zfsctl_snapshot_hold(se); return (se); } @@ -702,37 +704,6 @@ zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, return (0); } -/* - * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" - */ -static int -zfsctl_snapshot_path(struct path *path, int len, char *full_path) -{ - char *path_buffer, *path_ptr; - int path_len, error = 0; - - path_buffer = kmem_alloc(len, KM_SLEEP); - - path_ptr = d_path(path, path_buffer, len); - if (IS_ERR(path_ptr)) { - error = -PTR_ERR(path_ptr); - goto out; - } - - path_len = path_buffer + len - 1 - path_ptr; - if (path_len > len) { - error = SET_ERROR(EFAULT); - goto out; - } - - memcpy(full_path, path_ptr, path_len); - full_path[path_len] = '\0'; -out: - kmem_free(path_buffer, len); - - return (error); -} - /* * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" */ @@ -1047,8 +1018,6 @@ zfsctl_snapshot_unmount(char *snapname, int flags) return (error); } -#define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ - int zfsctl_snapshot_mount(struct path *path, int flags) { @@ -1078,9 +1047,13 @@ zfsctl_snapshot_mount(struct path *path, int flags) if (error) goto error; - error = zfsctl_snapshot_path(path, MAXPATHLEN, full_path); - if (error) - goto error; + /* + * Construct a mount point path from sb of the ctldir inode and dirent + * name, instead of from d_path(), so that chroot'd process doesn't fail + * on mount.zfs(8). + */ + snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", + zfsvfs->z_vfs->vfs_mntpoint, dname(dentry)); /* * Multiple concurrent automounts of a snapshot are never allowed. @@ -1109,8 +1082,8 @@ zfsctl_snapshot_mount(struct path *path, int flags) error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (error) { if (!(error & MOUNT_BUSY << 8)) { - cmn_err(CE_WARN, "Unable to automount %s/%s: %d", - full_path, full_name, error); + zfs_dbgmsg("Unable to automount %s error=%d", + full_path, error); error = SET_ERROR(EISDIR); } else { /* diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index c43b7db3d4a7..08d405cb8546 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2746,10 +2746,9 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl, * Check that all the properties are valid user properties. */ static int -zfs_check_userprops(const char *fsname, nvlist_t *nvl) +zfs_check_userprops(nvlist_t *nvl) { nvpair_t *pair = NULL; - int error = 0; while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) { const char *propname = nvpair_name(pair); @@ -2758,10 +2757,6 @@ zfs_check_userprops(const char *fsname, nvlist_t *nvl) nvpair_type(pair) != DATA_TYPE_STRING) return (SET_ERROR(EINVAL)); - if ((error = zfs_secpolicy_write_perms(fsname, - ZFS_DELEG_PERM_USERPROP, CRED()))) - return (error); - if (strlen(propname) >= ZAP_MAXNAMELEN) return (SET_ERROR(ENAMETOOLONG)); @@ -3475,19 +3470,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *pair; (void) nvlist_lookup_nvlist(innvl, "props", &props); - if ((error = zfs_check_userprops(poolname, props)) != 0) - return (error); - if (!nvlist_empty(props) && zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS)) return (SET_ERROR(ENOTSUP)); + if ((error = zfs_check_userprops(props)) != 0) + return (error); snaps = fnvlist_lookup_nvlist(innvl, "snaps"); poollen = strlen(poolname); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { const char *name = nvpair_name(pair); - const char *cp = strchr(name, '@'); + char *cp = strchr(name, '@'); /* * The snap name must contain an @, and the part after it must @@ -3504,6 +3498,18 @@ zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) (name[poollen] != '/' && name[poollen] != '@')) return (SET_ERROR(EXDEV)); + /* + * Check for permission to set the properties on the fs. + */ + if (!nvlist_empty(props)) { + *cp = '\0'; + error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_USERPROP, CRED()); + *cp = '@'; + if (error != 0) + return (error); + } + /* This must be the only snap of this fs. */ for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair); pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) { @@ -7106,7 +7112,8 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); - error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); + error = ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), + flag); if (error != 0) { error = SET_ERROR(EFAULT); goto out; @@ -7265,7 +7272,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) out: nvlist_free(innvl); - rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); + rc = ddi_copyout(zc, (void *)(uintptr_t)arg, sizeof (zfs_cmd_t), flag); if (error == 0 && rc != 0) error = SET_ERROR(EFAULT); if (error == 0 && vec->zvec_allow_log) { @@ -7376,13 +7383,6 @@ _init(void) { int error; - error = -vn_set_pwd("/"); - if (error) { - printk(KERN_NOTICE - "ZFS: Warning unable to set pwd to '/': %d\n", error); - return (error); - } - if ((error = -zvol_init()) != 0) return (error); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 15c396ce0329..5966b7612b35 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -380,12 +380,14 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +void zil_remove_async(zilog_t *zilog, uint64_t oid); + /* * Handles both TX_REMOVE and TX_RMDIR transactions. */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid) + znode_t *dzp, char *name, uint64_t foid, boolean_t unlinked) { itx_t *itx; lr_remove_t *lr; @@ -401,6 +403,17 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, itx->itx_oid = foid; + /* + * Object ids can be re-instantiated in the next txg so + * remove any async transactions to avoid future leaks. + * This can happen if a fsync occurs on the re-instantiated + * object for a WR_INDIRECT or WR_NEED_COPY write, which gets + * the new file data and flushes a write record for the old object. + */ + if (unlinked) { + ASSERT((txtype & ~TX_CI) == TX_REMOVE); + zil_remove_async(zilog, foid); + } zil_itx_assign(zilog, itx, tx); } diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 144381769059..7dea85bb6614 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -337,8 +337,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto bail; if (lr->lr_common.lrc_txtype & TX_CI) @@ -473,8 +473,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) xva.xva_vattr.va_nblocks = lr->lr_gen; xva.xva_vattr.va_fsid = dnodesize; - error = dmu_object_info(zfsvfs->z_os, objid, NULL); - if (error != ENOENT) + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) goto out; if (lr->lr_common.lrc_txtype & TX_CI) diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c index 30b5edb01e18..bb7f3b69a662 100644 --- a/module/zfs/zfs_sysfs.c +++ b/module/zfs/zfs_sysfs.c @@ -144,6 +144,10 @@ zfs_kobj_release(struct kobject *kobj) zkobj->zko_attr_count = 0; } +#ifndef sysfs_attr_init +#define sysfs_attr_init(attr) do {} while (0) +#endif + static void zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) { @@ -154,6 +158,7 @@ zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name) zkobj->zko_attr_list[attr_num].name = attr_name; zkobj->zko_attr_list[attr_num].mode = 0444; zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num]; + sysfs_attr_init(&zkobj->zko_attr_list[attr_num]); } static int @@ -259,6 +264,7 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, char *buf, size_t buflen) { const char *show_str; + char number[32]; /* For dataset properties list the dataset types that apply */ if (strcmp(attr_name, "datasets") == 0 && @@ -286,8 +292,6 @@ zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property, } else if (strcmp(attr_name, "values") == 0) { show_str = property->pd_values ? property->pd_values : ""; } else if (strcmp(attr_name, "default") == 0) { - char number[32]; - switch (property->pd_proptype) { case PROP_TYPE_NUMBER: (void) snprintf(number, sizeof (number), "%llu", diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index d8e74cf138e5..e12a0a64843d 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1751,7 +1751,12 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * - * Release all holds on dbufs. + * Release all holds on dbufs. We also grab an extra reference to all + * the remaining inodes so that the kernel does not attempt to free + * any inodes of a suspended fs. This can cause deadlocks since the + * zfs_resume_fs() process may involve starting threads, which might + * attempt to free unreferenced inodes to free up memory for the new + * thread. */ if (!unmounting) { mutex_enter(&zfsvfs->z_znodes_lock); @@ -1759,6 +1764,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) zp = list_next(&zfsvfs->z_all_znodes, zp)) { if (zp->z_sa_hdl) zfs_znode_dmu_fini(zp); + if (igrab(ZTOI(zp)) != NULL) + zp->z_suspended = B_TRUE; + } mutex_exit(&zfsvfs->z_znodes_lock); } @@ -2214,6 +2222,12 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } + + /* see comment in zfs_suspend_fs() */ + if (zp->z_suspended) { + zfs_iput_async(ZTOI(zp)); + zp->z_suspended = B_FALSE; + } } mutex_exit(&zfsvfs->z_znodes_lock); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 885d9633b01f..3c2278164289 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -775,7 +775,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) */ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); + DB_DNODE_ENTER(db); + dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, + MIN(n, max_blksz)); + DB_DNODE_EXIT(db); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { @@ -1048,7 +1052,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) return (SET_ERROR(ENOENT)); } - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; zgd->zgd_private = zp; @@ -1676,6 +1680,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, * IN: dip - inode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -1881,7 +1886,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) txtype = TX_REMOVE; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, obj); + zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); dmu_tx_commit(tx); out: @@ -1917,6 +1922,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. + * flags - case flags. * vsecp - ACL to be set * * OUT: ipp - inode of created directory. @@ -2213,7 +2219,8 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, uint64_t txtype = TX_RMDIR; if (flags & FIGNORECASE) txtype |= TX_CI; - zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); + zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT, + B_FALSE); } dmu_tx_commit(tx); @@ -2235,13 +2242,12 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, } /* - * Read as many directory entries as will fit into the provided - * dirent buffer from the given directory cursor position. + * Read directory entries from the given directory cursor position and emit + * name and position for each entry. * * IN: ip - inode of directory to read. - * dirent - buffer for directory entries. - * - * OUT: dirent - filler buffer of directory entries. + * ctx - directory entry context. + * cr - credentials of caller. * * RETURN: 0 if success * error code if failure @@ -4006,13 +4012,14 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, * Insert the indicated symbolic reference entry into the directory. * * IN: dip - Directory to contain new symbolic link. - * link - Name for new symlink entry. + * name - Name of directory entry in dip. * vap - Attributes of new entry. - * target - Target path of new symlink. - * + * link - Name for new symlink entry. * cr - credentials of caller. * flags - case flags * + * OUT: ipp - Inode for new symbolic link. + * * RETURN: 0 on success, error code on failure. * * Timestamps: @@ -4216,6 +4223,7 @@ zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) * sip - inode of new entry. * name - name of new entry. * cr - credentials of caller. + * flags - case flags. * * RETURN: 0 if success * error code if failure @@ -4729,7 +4737,6 @@ zfs_inactive(struct inode *ip) * IN: ip - inode seeking within * ooff - old file offset * noffp - pointer to new file offset - * ct - caller context * * RETURN: 0 if success * EINVAL if new offset invalid @@ -5072,13 +5079,14 @@ zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) #ifdef HAVE_UIO_ZEROCOPY /* - * Tunable, both must be a power of 2. - * - * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf - * zcr_blksz_max: if set to less than the file block size, allow loaning out of - * an arcbuf for a partial block read + * The smallest read we may consider to loan out an arcbuf. + * This must be a power of 2. */ int zcr_blksz_min = (1 << 10); /* 1K */ +/* + * If set to less than the file block size, allow loaning out of an + * arcbuf for a partial block read. This must be a power of 2. + */ int zcr_blksz_max = (1 << 17); /* 128K */ /*ARGSUSED*/ @@ -5257,9 +5265,11 @@ EXPORT_SYMBOL(zfs_putpage); EXPORT_SYMBOL(zfs_dirty_inode); EXPORT_SYMBOL(zfs_map); -/* CSTYLED */ +/* BEGIN CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); -module_param(zfs_read_chunk_size, long, 0644); +module_param(zfs_read_chunk_size, ulong, 0644); MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk"); +/* END CSTYLED */ + #endif diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index d5ed4af7029d..91162e857d44 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -515,7 +515,7 @@ zfs_inode_update(znode_t *zp) */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, - dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) + dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; struct inode *ip; @@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_acl_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_moved = 0; + zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; @@ -596,7 +597,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_ctime, ctime); - ip->i_ino = obj; + ip->i_ino = zp->z_id; zfs_inode_update(zp); zfs_inode_set_ops(zfsvfs, ip); @@ -651,12 +652,11 @@ static zfs_acl_phys_t acl_phys; * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root + * IS_TMPFILE - new object is of O_TMPFILE * IS_XATTR - new object is an attribute - * bonuslen - length of bonus buffer - * setaclp - File/Dir initial ACL - * fuidp - Tracks fuid allocation. + * acl_ids - ACL related attributes * - * OUT: zpp - allocated znode + * OUT: zpp - allocated znode (set to dzp if IS_ROOT_NODE) * */ void @@ -911,8 +911,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * not fail retry until sufficient memory has been reclaimed. */ do { - *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, obj, - sa_hdl); + *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); } while (*zpp == NULL); VERIFY(*zpp != NULL); @@ -1135,7 +1134,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * bonus buffer. */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, - doi.doi_bonus_type, obj_num, NULL); + doi.doi_bonus_type, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { diff --git a/module/zfs/zil.c b/module/zfs/zil.c index e3af70d7fe37..f77de8d14c00 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1836,7 +1836,7 @@ zil_aitx_compare(const void *x1, const void *x2) /* * Remove all async itx with the given oid. */ -static void +void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; @@ -1888,16 +1888,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) itxg_t *itxg; itxs_t *itxs, *clean = NULL; - /* - * Object ids can be re-instantiated in the next txg so - * remove any async transactions to avoid future leaks. - * This can happen if a fsync occurs on the re-instantiated - * object for a WR_INDIRECT or WR_NEED_COPY write, which gets - * the new file data and flushes a write record for the old object. - */ - if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE) - zil_remove_async(zilog, itx->itx_oid); - /* * Ensure the data of a renamed file is committed before the rename. */ diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 36814c11f29d..035b3a499d5a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -96,9 +96,23 @@ int zio_slow_io_ms = (30 * MILLISEC); * * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that * regular blocks are not deferred. + * + * Starting in sync pass 8 (zfs_sync_pass_dont_compress), we disable + * compression (including of metadata). In practice, we don't have this + * many sync passes, so this has no effect. + * + * The original intent was that disabling compression would help the sync + * passes to converge. However, in practice disabling compression increases + * the average number of sync passes, because when we turn compression off, a + * lot of block's size will change and thus we have to re-allocate (not + * overwrite) them. It also increases the number of 128KB allocations (e.g. + * for indirect blocks and spacemaps) because these will not be compressed. + * The 128K allocations are especially detrimental to performance on highly + * fragmented systems, which may have very few free segments of this size, + * and may need to load new metaslabs to satisfy 128K allocations. */ int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */ -int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */ +int zfs_sync_pass_dont_compress = 8; /* don't compress starting in this pass */ int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ /* @@ -2908,6 +2922,20 @@ zio_nop_write(zio_t *zio) ASSERT(bcmp(&bp->blk_prop, &bp_orig->blk_prop, sizeof (uint64_t)) == 0); + /* + * If we're overwriting a block that is currently on an + * indirect vdev, then ignore the nopwrite request and + * allow a new block to be allocated on a concrete vdev. + */ + spa_config_enter(zio->io_spa, SCL_VDEV, FTAG, RW_READER); + vdev_t *tvd = vdev_lookup_top(zio->io_spa, + DVA_GET_VDEV(&bp->blk_dva[0])); + if (tvd->vdev_ops == &vdev_indirect_ops) { + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + return (zio); + } + spa_config_exit(zio->io_spa, SCL_VDEV, FTAG); + *bp = *bp_orig; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; zio->io_flags |= ZIO_FLAG_NOPWRITE; @@ -3238,7 +3266,9 @@ zio_ddt_write(zio_t *zio) BP_ZERO(bp); } else { zp->zp_dedup = B_FALSE; + BP_SET_DEDUP(bp, B_FALSE); } + ASSERT(!BP_GET_DEDUP(bp)); zio->io_pipeline = ZIO_WRITE_PIPELINE; ddt_exit(ddt); return (zio); diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 8ee6e9a97f0a..95523f28e3b4 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -1130,12 +1130,9 @@ zpl_init_acl(struct inode *ip, struct inode *dir) return (0); if (!S_ISLNK(ip->i_mode)) { - if (ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) { - acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return (PTR_ERR(acl)); - } - + acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return (PTR_ERR(acl)); if (!acl) { ip->i_mode &= ~current_umask(); ip->i_ctime = current_time(ip); @@ -1144,7 +1141,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir) } } - if ((ITOZSB(ip)->z_acl_type == ZFS_ACLTYPE_POSIXACL) && acl) { + if (acl) { umode_t mode; if (S_ISDIR(ip->i_mode)) { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c29f65f676b9..7c7500dbaaf7 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1876,6 +1876,10 @@ zvol_create_minor_impl(const char *name) #ifdef QUEUE_FLAG_ADD_RANDOM blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue); #endif + /* This flag was introduced in kernel version 4.12. */ +#ifdef QUEUE_FLAG_SCSI_PASSTHROUGH + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_queue); +#endif if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index 568bef988ca0..d87293686422 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -73,7 +73,7 @@ exit 1 %preun # Are we doing an upgrade? -if [ $1 -ne 0 ] ; then +if [ "$1" = "1" -o "$1" = "upgrade" ] ; then # Yes we are. Are we upgrading to a new ZFS version? NEWEST_VER=$(dkms status zfs | sed 's/,//g' | sort -r -V | awk '/installed/{print $2; exit}') if [ "$NEWEST_VER" != "%{version}" ] ; then diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 9faa3ba771a1..b9ca5ed5fb74 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -99,6 +99,7 @@ %define __python_cffi_pkg python%{__python_pkg_version}-cffi %define __python_setuptools_pkg python%{__python_pkg_version}-setuptools %endif +%define __python_sitelib %(%{__python} -Esc "from distutils.sysconfig import get_python_lib; print(get_python_lib())") # By default python-pyzfs is enabled, with the exception of # RHEL 6 which by default uses Python 2.6 which is too old. @@ -138,7 +139,7 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel -%if 0%{?fedora} >= 28 +%if 0%{?fedora} >= 28 || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif Requires: openssl @@ -255,7 +256,8 @@ validating the file system. %package dracut Summary: Dracut module Group: System Environment/Kernel -Requires: %{name}%{?_isa} = %{version}-%{release} +BuildArch: noarch +Requires: %{name} >= %{version} Requires: dracut Requires: /usr/bin/awk Requires: grep @@ -320,7 +322,7 @@ image which is ZFS aware. %if 0%{?_systemd} %define systemd --enable-systemd --with-systemdunitdir=%{_unitdir} --with-systemdpresetdir=%{_presetdir} --with-systemdmodulesloaddir=%{_modulesloaddir} --with-systemdgeneratordir=%{_systemdgeneratordir} --disable-sysvinit - %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target + %define systemd_svcs zfs-import-cache.service zfs-import-scan.service zfs-mount.service zfs-share.service zfs-zed.service zfs.target zfs-import.target zfs-volume-wait.service zfs-volumes.target %else %define systemd --enable-sysvinit --disable-systemd %endif @@ -417,6 +419,7 @@ systemctl --system daemon-reload >/dev/null || true %{_sbindir}/* %{_bindir}/raidz_test %{_bindir}/zgenhostid +%{_bindir}/zvol_wait # Optional Python 2/3 scripts %{_bindir}/arc_summary %{_bindir}/arcstat @@ -473,8 +476,8 @@ systemctl --system daemon-reload >/dev/null || true %doc contrib/pyzfs/README %doc contrib/pyzfs/LICENSE %defattr(-,root,root,-) -%{python_sitelib}/libzfs_core/* -%{python_sitelib}/pyzfs* +%{__python_sitelib}/libzfs_core/* +%{__python_sitelib}/pyzfs* %endif %if 0%{?_initramfs} diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in index 473f2d032509..f632c4867e63 100644 --- a/rpm/redhat/zfs-kmod.spec.in +++ b/rpm/redhat/zfs-kmod.spec.in @@ -41,6 +41,7 @@ This package contains the ZFS kernel modules. %package -n kmod-%{kmod_name}-devel Summary: ZFS kernel module(s) devel common Group: System Environment/Kernel +Provides: kmod-spl-devel = %{version} %description -n kmod-%{kmod_name}-devel This package provides the header files and objects to build kernel modules. diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 11e963c527a8..d275a41c4e04 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -60,7 +60,7 @@ all-local: -e '\|^export SBIN_DIR=|s|$$|@abs_top_builddir@/bin|' \ -e '\|^export ZTS_DIR=|s|$$|@abs_top_srcdir@/tests|' \ -e '\|^export SCRIPT_DIR=|s|$$|@abs_top_srcdir@/scripts|' \ - common.sh.in >common.sh + $(abs_top_srcdir)/scripts/common.sh.in >common.sh -echo "$$EXTRA_ENVIRONMENT" >>common.sh clean-local: @@ -71,4 +71,5 @@ install-data-hook: -e '\|^export SBIN_DIR=|s|$$|@sbindir@|' \ -e '\|^export ZTS_DIR=|s|$$|@datadir@/@PACKAGE@|' \ -e '\|^export SCRIPT_DIR=|s|$$|@datadir@/@PACKAGE@|' \ - common.sh.in >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh + $(abs_top_srcdir)/scripts/common.sh.in \ + >$(DESTDIR)$(datadir)/@PACKAGE@/common.sh diff --git a/scripts/kmodtool b/scripts/kmodtool index a632dd046b5a..9298d6d27dfa 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -144,7 +144,13 @@ print_rpmtemplate_per_kmodpkg () local kernel_uname_r=${1} local kernel_variant="${2:+-${2}}" - # first part + # Detect depmod install location + local depmod_path=/sbin/depmod + if [ ! -f ${depmod_path} ]; then + depmod_path=/usr/sbin/depmod + fi + + # first part cat <= %{?epoch:%{epoch}:}%{version} -Requires(post): ${prefix}/sbin/depmod -Requires(postun): ${prefix}/sbin/depmod + +%if 0%{?rhel} == 6 || 0%{?centos} == 6 +Requires(post): module-init-tools +Requires(postun): module-init-tools +%else +Requires(post): kmod +Requires(postun): kmod +%endif EOF if [[ ${obsolete_name} ]]; then @@ -170,17 +182,17 @@ BuildRequires: kernel-devel-uname-r = ${kernel_uname_r} %{?KmodsRequires:Requires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %{?KmodsRequires:BuildRequires: %{KmodsRequires}-uname-r = ${kernel_uname_r}} %post -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : +${prefix}${depmod_path} -aeF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -${prefix}/sbin/depmod -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : +${prefix}${depmod_path} -aF /boot/System.map-${kernel_uname_r} ${kernel_uname_r} &> /dev/null || : EOF else cat < /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : %postun -n kmod-${kmodname}-${kernel_uname_r} -[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}/sbin/depmod -a > /dev/null || : +[[ "\$(uname -r)" == "${kernel_uname_r}" ]] && ${prefix}${depmod_path} -a > /dev/null || : EOF fi diff --git a/scripts/make_gitrev.sh b/scripts/make_gitrev.sh index bab9be88d734..1cf143794b26 100755 --- a/scripts/make_gitrev.sh +++ b/scripts/make_gitrev.sh @@ -39,3 +39,7 @@ trap cleanup EXIT git rev-parse --git-dir > /dev/null 2>&1 # Get the git current git revision ZFS_GIT_REV=$(git describe --always --long --dirty 2>/dev/null) +# Check if header file already contain the exact string +grep -sq "\"${ZFS_GIT_REV}\"" "$(dirname "$0")"/../include/zfs_gitrev.h && + trap - EXIT +exit 0 diff --git a/tests/runfiles/Makefile.am b/tests/runfiles/Makefile.am index 138d905a5722..4625806ff8ba 100644 --- a/tests/runfiles/Makefile.am +++ b/tests/runfiles/Makefile.am @@ -1,2 +1,5 @@ pkgdatadir = $(datadir)/@PACKAGE@/runfiles -dist_pkgdata_DATA = *.run +dist_pkgdata_DATA = \ + linux.run \ + longevity.run \ + perf-regression.run diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 22fc26212c0d..ff98661ec795 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -182,7 +182,8 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', - 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', + 'zfs_mount_test_race'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] @@ -461,10 +462,7 @@ tests = ['zpool_split_cliargs', 'zpool_split_devices', tags = ['functional', 'cli_root', 'zpool_split'] [tests/functional/cli_root/zpool_status] -tests = ['zpool_status_001_pos', 'zpool_status_002_pos','zpool_status_003_pos', - 'zpool_status_-c_disable', 'zpool_status_-c_homedir', - 'zpool_status_-c_searchpath'] -user = +tests = ['zpool_status_001_pos', 'zpool_status_002_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] @@ -528,6 +526,12 @@ tests = ['zpool_list_001_pos', 'zpool_list_002_neg'] user = tags = ['functional', 'cli_user', 'zpool_list'] +[tests/functional/cli_user/zpool_status] +tests = ['zpool_status_003_pos', 'zpool_status_-c_disable', + 'zpool_status_-c_homedir', 'zpool_status_-c_searchpath'] +user = +tags = ['functional', 'cli_user', 'zpool_status'] + [tests/functional/compression] tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', 'compress_004_pos'] @@ -653,7 +657,7 @@ tags = ['functional', 'mmap'] tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', - 'mmp_on_zdb', 'mmp_write_distribution'] + 'mmp_on_zdb', 'mmp_write_distribution', 'mmp_hostid'] tags = ['functional', 'mmp'] [tests/functional/mount] @@ -758,7 +762,7 @@ tags = ['functional', 'refreserv'] pre = tests = ['removal_all_vdev', 'removal_check_space', 'removal_condense_export', 'removal_multiple_indirection', - 'removal_remap', 'removal_remap_deadlists', + 'removal_remap', 'removal_nopwrite', 'removal_remap_deadlists', 'removal_resume_export', 'removal_sanity', 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', 'removal_with_errors', 'removal_with_export', @@ -820,8 +824,8 @@ tags = ['functional', 'scrub_mirror'] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', - 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs', - 'slog_replay_volume'] + 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs_001', + 'slog_replay_fs_002', 'slog_replay_volume'] tags = ['functional', 'slog'] [tests/functional/snapshot] @@ -843,6 +847,11 @@ tags = ['functional', 'snapused'] tests = ['sparse_001_pos'] tags = ['functional', 'sparse'] +[tests/functional/suid] +tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', + 'suid_write_to_none'] +tags = ['functional', 'suid'] + [tests/functional/threadsappend] tests = ['threadsappend_001_pos'] tags = ['functional', 'threadsappend'] diff --git a/tests/test-runner/include/logapi.shlib b/tests/test-runner/include/logapi.shlib index 32fc00616180..cd7982a94a0b 100644 --- a/tests/test-runner/include/logapi.shlib +++ b/tests/test-runner/include/logapi.shlib @@ -198,12 +198,12 @@ function log_neg_expect elif (( $status == 127 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (File not found)" - # bus error - core dump - elif (( $status == 138 )); then + # bus error - core dump (256+signal, SIGBUS=7) + elif (( $status == 263 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (Bus Error)" - # segmentation violation - core dump - elif (( $status == 139 )); then + # segmentation violation - core dump (256+signal, SIGSEGV=11) + elif (( $status == 267 )); then print -u2 $($out) _printerror "$@" "unexpectedly exited $status (SEGV)" else diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 9cac7184f9fc..ca8807e82c6a 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -18,6 +18,7 @@ # Copyright (c) 2017 Lawrence Livermore National Security, LLC. # Copyright (c) 2017 Datto Inc. # Copyright (c) 2017 Open-E, Inc. All Rights Reserved. +# Copyright 2019 Richard Elling # # @@ -55,12 +56,29 @@ function scan_scsi_hosts # # Wait for newly created block devices to have their minors created. +# Additional arguments can be passed to udevadm trigger, with the expected +# arguments to typically be a block device pathname. This is useful when +# checking waiting on a specific device to settle rather than triggering +# all devices and waiting for them all to settle. +# +# The udevadm settle timeout can be 120 or 180 seconds by default for +# some distros. If a long delay is experienced, it could be due to some +# strangeness in a malfunctioning device that isn't related to the devices +# under test. To help debug this condition, a notice is given if settle takes +# too long. +# +# Note: there is no meaningful return code if udevadm fails. Consumers +# should not expect a return code (do not call as argument to log_must) # function block_device_wait { if is_linux; then - udevadm trigger + udevadm trigger $* + typeset local start=$SECONDS udevadm settle + typeset local elapsed=$((SECONDS - start)) + [[ $elapsed > 60 ]] && \ + log_note udevadm settle time too long: $elapsed fi } diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 57d0880cc9bb..c7cb36a8d0ee 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -861,7 +861,8 @@ function zero_partitions # # best to retire this interface and replace it with something more flexible. # At the moment a best effort is made. # -function set_partition # +# arguments: +function set_partition { typeset -i slicenum=$1 typeset start=$2 @@ -872,6 +873,7 @@ function set_partition # /dev/null + parted $disk -s -- print 1 >/dev/null typeset ret_val=$? if [[ $slicenum -eq 0 || $ret_val -ne 0 ]]; then - parted $DEV_DSKDIR/$disk -s -- mklabel gpt + parted $disk -s -- mklabel gpt if [[ $? -ne 0 ]]; then log_note "Failed to create GPT partition table on $disk" return 1 @@ -899,20 +901,21 @@ function set_partition # /dev/null - block_device_wait + blockdev --rereadpt $disk 2>/dev/null + block_device_wait $disk else if [[ -z $slicenum || -z $size || -z $disk ]]; then log_fail "The slice, size or disk name is unspecified." @@ -932,10 +935,10 @@ function set_partition # > $format_file format -e -s -d $disk -f $format_file + typeset ret_val=$? + rm -f $format_file fi - typeset ret_val=$? - rm -f $format_file if [[ $ret_val -ne 0 ]]; then log_note "Unable to format $disk slice $slicenum to $size" return 1 @@ -3491,13 +3494,13 @@ function set_tunable_impl Linux) typeset zfs_tunables="/sys/module/$module/parameters" [[ -w "$zfs_tunables/$tunable" ]] || return 1 - echo -n "$value" > "$zfs_tunables/$tunable" - return "$?" + cat >"$zfs_tunables/$tunable" <<<"$value" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 echo "${tunable}/${mdb_cmd}0t${value}" | mdb -kw - return "$?" + return $? ;; esac } @@ -3524,7 +3527,7 @@ function get_tunable_impl typeset zfs_tunables="/sys/module/$module/parameters" [[ -f "$zfs_tunables/$tunable" ]] || return 1 cat $zfs_tunables/$tunable - return "$?" + return $? ;; SunOS) [[ "$module" -eq "zfs" ]] || return 1 diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index da27673ec946..ac0ba7cf3d1d 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -66,6 +66,7 @@ SUBDIRS = \ snapshot \ snapused \ sparse \ + suid \ threadsappend \ tmpfile \ trim \ diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am index f72546b22590..905d991ed75f 100644 --- a/tests/zfs-tests/tests/functional/checksum/Makefile.am +++ b/tests/zfs-tests/tests/functional/checksum/Makefile.am @@ -1,7 +1,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include -LDADD = $(top_srcdir)/lib/libicp/libicp.la +LDADD = $(top_builddir)/lib/libicp/libicp.la AUTOMAKE_OPTIONS = subdir-objects diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh index a5f827b5642f..e69779bd4b4c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_001_neg.ksh @@ -59,7 +59,7 @@ set -A args "create" "add" "destroy" "import fakepool" \ "-a" "-f" "-g" "-h" "-j" "-m" "-n" "-o" "-p" \ "-p /tmp" "-r" "-t" "-w" "-x" "-y" "-z" \ "-D" "-E" "-G" "-H" "-I" "-J" "-K" "-M" \ - "-N" "-Q" "-R" "-S" "-T" "-W" "-Y" "-Z" + "-N" "-Q" "-R" "-S" "-T" "-W" "-Z" log_assert "Execute zdb using invalid parameters." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh index b8190626c7b3..63f5e595ea38 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_009_neg.ksh @@ -90,7 +90,9 @@ set -A args "$TESTPOOL/" "$TESTPOOL//blah" "$TESTPOOL/@blah" \ "$TESTPOOL/blah*blah" "$TESTPOOL/blah blah" \ "-s $TESTPOOL/$TESTFS1" "-b 1092 $TESTPOOL/$TESTFS1" \ "-b 64k $TESTPOOL/$TESTFS1" "-s -b 32k $TESTPOOL/$TESTFS1" \ - "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" + "$TESTPOOL/$BYND_MAX_NAME" "$TESTPOOL/$BYND_NEST_LIMIT" \ + "$TESTPOOL/." "$TESTPOOL/.." "$TESTPOOL/../blah" "$TESTPOOL/./blah" \ + "$TESTPOOL/blah/./blah" "$TESTPOOL/blah/../blah" log_assert "Verify 'zfs create ' fails with bad argument." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh index 9d5ecab0dfee..7e5072f0d5fd 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -51,10 +51,10 @@ # yes unspec 0 1 no no keyformat specified # yes unspec 1 0 yes new encryption root, crypt inherited # yes unspec 1 1 yes new encryption root, crypt inherited -# yes off 0 0 no unencrypted child of encrypted parent -# yes off 0 1 no unencrypted child of encrypted parent -# yes off 1 0 no unencrypted child of encrypted parent -# yes off 1 1 no unencrypted child of encrypted parent +# yes off 0 0 yes unencrypted child of encrypted parent +# yes off 0 1 no keylocation given, but crypt off +# yes off 1 0 no keyformat given, but crypt off +# yes off 1 1 no keyformat given, but crypt off # yes on 0 0 yes inherited encryption, local crypt # yes on 0 1 no no keyformat specified for new key # yes on 1 0 yes new encryption root @@ -113,7 +113,9 @@ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" -log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/c5)" == "off" + log_mustnot zfs create -o encryption=off -o keylocation=prompt \ $TESTPOOL/$TESTFS2/c5 log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ @@ -122,13 +124,13 @@ log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "$TESTPOOL/$TESTFS2/c5" + "$TESTPOOL/$TESTFS2/c6" log_mustnot zfs create -o encryption=on -o keylocation=prompt \ - $TESTPOOL/$TESTFS2/c6 + $TESTPOOL/$TESTFS2/c7 log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c7" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ - "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c8" log_pass "ZFS creates datasets only if they have a valid combination of" \ "encryption properties set." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am index b2de98934b74..c208a1c378dc 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am @@ -19,6 +19,7 @@ dist_pkgdata_SCRIPTS = \ zfs_mount_all_mountpoints.ksh \ zfs_mount_encrypted.ksh \ zfs_mount_remount.ksh \ + zfs_mount_test_race.sh \ zfs_multi_mount.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh new file mode 100755 index 000000000000..404770b2727f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_test_race.sh @@ -0,0 +1,116 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.cfg + +# +# DESCRIPTION: +# Verify parallel mount ordering is consistent. +# +# There was a bug in initial thread dispatching algorithm which put threads +# under race condition which resulted in undefined mount order. The purpose +# of this test is to verify `zfs unmount -a` succeeds (not `zfs mount -a` +# succeeds, it always does) after `zfs mount -a`, which could fail if threads +# race. See github.com/zfsonlinux/zfs/issues/{8450,8833,8878} for details. +# +# STRATEGY: +# 1. Create pools and filesystems. +# 2. Set same mount point for >1 datasets. +# 3. Unmount all datasets. +# 4. Mount all datasets. +# 5. Unmount all datasets (verify this succeeds). +# + +verify_runnable "both" + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} +MNTPT=$TMPDIR/zfs_mount_test_race_mntpt +DISK1="$TMPDIR/zfs_mount_test_race_disk1" +DISK2="$TMPDIR/zfs_mount_test_race_disk2" + +TESTPOOL1=zfs_mount_test_race_tp1 +TESTPOOL2=zfs_mount_test_race_tp2 + +export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +function cleanup +{ + zpool destroy $TESTPOOL1 + zpool destroy $TESTPOOL2 + rm -rf $MNTPT + rm -rf /$TESTPOOL1 + rm -rf /$TESTPOOL2 + rm -f $DISK1 + rm -f $DISK2 + export __ZFS_POOL_RESTRICT="$TESTPOOL1 $TESTPOOL2" + log_must zfs $mountall + unset __ZFS_POOL_RESTRICT +} +log_onexit cleanup + +log_note "Verify parallel mount ordering is consistent" + +log_must truncate -s $MINVDEVSIZE $DISK1 +log_must truncate -s $MINVDEVSIZE $DISK2 + +log_must zpool create -f $TESTPOOL1 $DISK1 +log_must zpool create -f $TESTPOOL2 $DISK2 + +log_must zfs create $TESTPOOL1/$TESTFS1 +log_must zfs create $TESTPOOL2/$TESTFS2 + +log_must zfs set mountpoint=none $TESTPOOL1 +log_must zfs set mountpoint=$MNTPT $TESTPOOL1/$TESTFS1 + +# Note that unmount can fail (due to race condition on `zfs mount -a`) with or +# without `canmount=off`. The race has nothing to do with canmount property, +# but turn it off for convenience of mount layout used in this test case. +log_must zfs set canmount=off $TESTPOOL2 +log_must zfs set mountpoint=$MNTPT $TESTPOOL2 + +# At this point, layout of datasets in two pools will look like below. +# Previously, on next `zfs mount -a`, pthreads assigned to TESTFS1 and TESTFS2 +# could race, and TESTFS2 usually (actually always) won in ZoL. Note that the +# problem is how two or more threads could initially be assigned to the same +# top level directory, not this specific layout. This layout is just an example +# that can reproduce race, and is also the layout reported in #8833. +# +# NAME MOUNTED MOUNTPOINT +# ---------------------------------------------- +# /$TESTPOOL1 no none +# /$TESTPOOL1/$TESTFS1 yes $MNTPT +# /$TESTPOOL2 no $MNTPT +# /$TESTPOOL2/$TESTFS2 yes $MNTPT/$TESTFS2 + +# Apparently two datasets must be mounted. +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# This unmount always succeeds, because potential race hasn't happened yet. +log_must zfs unmount -a +# This mount always succeeds, whether threads are under race condition or not. +log_must zfs mount -a + +# Verify datasets are mounted (TESTFS2 fails if the race broke mount order). +log_must ismounted $TESTPOOL1/$TESTFS1 +log_must ismounted $TESTPOOL2/$TESTFS2 +# Verify unmount succeeds (fails if the race broke mount order). +log_must zfs unmount -a + +log_pass "Verify parallel mount ordering is consistent passed" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh index 57896c6fd305..f8e53f02c23d 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -46,7 +46,7 @@ function cleanup log_onexit cleanup -log_assert "ZFS should receive to an encrypted child dataset" +log_assert "ZFS should receive encrypted filesystems into child dataset" typeset passphrase="password" typeset snap="$TESTPOOL/$TESTFS@snap" @@ -60,11 +60,13 @@ log_must eval "echo $passphrase | zfs create -o encryption=on" \ log_note "Verifying ZFS will receive to an encrypted child" log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" -log_note "Verifying 'send -p' will not receive to an encrypted child" -log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_note "Verifying 'send -p' will receive to an encrypted child" +log_must eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c2)" == "off" -log_note "Verifying 'send -R' will not receive to an encrypted child" -log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_note "Verifying 'send -R' will receive to an encrypted child" +log_must eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS1/c3)" == "off" log_note "Verifying ZFS will not receive to an encrypted child when the" \ "parent key is unloaded" @@ -72,4 +74,4 @@ log_must zfs unmount $TESTPOOL/$TESTFS1 log_must zfs unload-key $TESTPOOL/$TESTFS1 log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" -log_pass "ZFS can receive to an encrypted child dataset" +log_pass "ZFS can receive encrypted filesystems into child dataset" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh index 400592aaca2c..1b9c6e3c704f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -23,12 +23,13 @@ # # DESCRIPTION: -# 'zfs rename' should not rename an unencrypted dataset to a child +# 'zfs rename' should be able to move an unencrypted dataset to a child # of an encrypted dataset # # STRATEGY: # 1. Create an encrypted dataset -# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# 2. Rename the default dataset to a child of the encrypted dataset +# 3. Confirm the child dataset doesn't have any encryption properties # verify_runnable "both" @@ -36,16 +37,17 @@ verify_runnable "both" function cleanup { datasetexists $TESTPOOL/$TESTFS2 && \ - log_must zfs destroy $TESTPOOL/$TESTFS2 + log_must zfs destroy -r $TESTPOOL/$TESTFS2 } log_onexit cleanup -log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ +log_assert "'zfs rename' should allow renaming an unencrypted dataset to a" \ "child of an encrypted dataset" log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" -log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS +log_must test "$(get_prop 'encryption' $TESTPOOL/$TESTFS2/$TESTFS)" == "off" -log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ +log_pass "'zfs rename' allows renaming an unencrypted dataset to a child" \ "of an encrypted dataset" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am index aab4de0e7c89..beb59e3d066b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_status/Makefile.am @@ -3,8 +3,4 @@ dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ zpool_status_001_pos.ksh \ - zpool_status_002_pos.ksh \ - zpool_status_003_pos.ksh \ - zpool_status_-c_disable.ksh \ - zpool_status_-c_homedir.ksh \ - zpool_status_-c_searchpath.ksh + zpool_status_002_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/Makefile.am index f1ff32e8d22d..119f8ee187f6 100644 --- a/tests/zfs-tests/tests/functional/cli_user/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_user/Makefile.am @@ -2,4 +2,5 @@ SUBDIRS = \ misc \ zfs_list \ zpool_iostat \ - zpool_list + zpool_list \ + zpool_status diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am new file mode 100644 index 000000000000..e1b339657749 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/Makefile.am @@ -0,0 +1,8 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_user/zpool_status +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + zpool_status_003_pos.ksh \ + zpool_status_-c_disable.ksh \ + zpool_status_-c_homedir.ksh \ + zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh new file mode 100755 index 000000000000..79cd6e9f908e --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh new file mode 100755 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_user/zpool_status/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_disable.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_disable.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_homedir.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_homedir.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_-c_searchpath.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_-c_searchpath.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh similarity index 100% rename from tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh rename to tests/zfs-tests/tests/functional/cli_user/zpool_status/zpool_status_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/hkdf/Makefile.am b/tests/zfs-tests/tests/functional/hkdf/Makefile.am index 3ac26ed21c16..b54e353cd963 100644 --- a/tests/zfs-tests/tests/functional/hkdf/Makefile.am +++ b/tests/zfs-tests/tests/functional/hkdf/Makefile.am @@ -2,7 +2,7 @@ include $(top_srcdir)/config/Rules.am AM_CPPFLAGS += -I$(top_srcdir)/include AM_CPPFLAGS += -I$(top_srcdir)/lib/libspl/include -LDADD = $(top_srcdir)/lib/libzpool/libzpool.la +LDADD = $(top_builddir)/lib/libzpool/libzpool.la AUTOMAKE_OPTIONS = subdir-objects diff --git a/tests/zfs-tests/tests/functional/mmp/Makefile.am b/tests/zfs-tests/tests/functional/mmp/Makefile.am index e39a0a5aac8e..2848fd4ce692 100644 --- a/tests/zfs-tests/tests/functional/mmp/Makefile.am +++ b/tests/zfs-tests/tests/functional/mmp/Makefile.am @@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \ mmp_reset_interval.ksh \ mmp_on_zdb.ksh \ mmp_write_distribution.ksh \ + mmp_hostid.ksh \ setup.ksh \ cleanup.ksh diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh new file mode 100755 index 000000000000..b492b1070caf --- /dev/null +++ b/tests/zfs-tests/tests/functional/mmp/mmp_hostid.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify the hostid file can reside on a ZFS dataset. +# +# STRATEGY: +# 1. Create a non-redundant pool +# 2. Create an 'etc' dataset containing a valid hostid file +# 3. Create a file so the pool will have some contents +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +# 5. Verify vdevs may be attached and detached +# 6. Verify normal, cache, log and special vdevs can be added +# 7. Verify normal, cache, and log vdevs can be removed +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must rm $MMP_DIR/file.{0,1,2,3,4,5} + log_must rmdir $MMP_DIR + log_must mmp_clear_hostid +} + +log_assert "Verify hostid file can reside on a ZFS dataset" +log_onexit cleanup + +log_must mkdir -p $MMP_DIR +log_must truncate -s $MINVDEVSIZE $MMP_DIR/file.{0,1,2,3,4,5} + +# 1. Create a non-redundant pool +log_must zpool create $MMP_POOL $MMP_DIR/file.0 + +# 2. Create an 'etc' dataset containing a valid hostid file; caching is +# disabled on the dataset to force the hostid to be read from disk. +log_must zfs create -o primarycache=none -o secondarycache=none $MMP_POOL/etc +mntpnt_etc=$(get_prop mountpoint $MMP_POOL/etc) +log_must mmp_set_hostid $HOSTID1 +log_must mv $HOSTID_FILE $mntpnt_etc/hostid + +# 3. Create a file so the pool will have some contents +log_must zfs create $MMP_POOL/fs +mntpnt_fs=$(get_prop mountpoint $MMP_POOL/fs) +log_must mkfile 1M $fs_mntpnt/file + +# 4. Verify multihost cannot be enabled until the /etc/hostid is linked +log_mustnot zpool set multihost=on $MMP_POOL +log_must ln -s $mntpnt_etc/hostid $HOSTID_FILE +log_must zpool set multihost=on $MMP_POOL + +# 5. Verify vdevs may be attached and detached +log_must zpool attach $MMP_POOL $MMP_DIR/file.0 $MMP_DIR/file.1 +log_must zpool detach $MMP_POOL $MMP_DIR/file.1 + +# 6. Verify normal, cache, log and special vdevs can be added +log_must zpool add $MMP_POOL $MMP_DIR/file.1 +log_must zpool add $MMP_POOL $MMP_DIR/file.2 +log_must zpool add $MMP_POOL cache $MMP_DIR/file.3 +log_must zpool add $MMP_POOL log $MMP_DIR/file.4 +log_must zpool add $MMP_POOL special $MMP_DIR/file.5 + +# 7. Verify normal, cache, and log vdevs can be removed +log_must zpool remove $MMP_POOL $MMP_DIR/file.2 +log_must zpool remove $MMP_POOL $MMP_DIR/file.3 +log_must zpool remove $MMP_POOL $MMP_DIR/file.4 + +log_pass "Verify hostid file can reside on a ZFS dataset." diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index ba42b899acac..df92e0b5ed44 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -18,7 +18,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal dist_pkgdata_SCRIPTS = \ cleanup.ksh removal_all_vdev.ksh removal_check_space.ksh \ removal_condense_export.ksh removal_multiple_indirection.ksh \ - removal_remap_deadlists.ksh removal_remap.ksh \ + removal_remap_deadlists.ksh removal_nopwrite.ksh removal_remap.ksh \ removal_reservation.ksh removal_resume_export.ksh \ removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \ removal_with_dedup.ksh removal_with_errors.ksh \ diff --git a/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh new file mode 100755 index 000000000000..cb8bd6b810c1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/removal_nopwrite.ksh @@ -0,0 +1,87 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib +. $STF_SUITE/tests/functional/nopwrite/nopwrite.shlib + +default_setup_noexit "$DISKS" +log_onexit default_cleanup_noexit +BLOCKSIZE=8192 + +origin="$TESTPOOL/$TESTFS" + +log_must zfs set compress=on $origin +log_must zfs set checksum=edonr $origin + +log_must zfs set recordsize=8k $origin +dd if=/dev/urandom of=$TESTDIR/file_8k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." +log_must zfs set recordsize=128k $origin +dd if=/dev/urandom of=$TESTDIR/file_128k bs=1024k count=$MEGS oflag=sync \ + conv=notrunc >/dev/null 2>&1 || log_fail "dd into $TESTDIR/file failed." + +zfs snapshot $origin@a || log_fail "zfs snap failed" +log_must zfs clone $origin@a $origin/clone + +# +# Verify that nopwrites work prior to removal +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_must verify_nopwrite $origin $origin@a $origin/clone + +# +# Remove a device before testing nopwrites again +# +log_must zpool remove $TESTPOOL $REMOVEDISK +log_must wait_for_removal $TESTPOOL +log_mustnot vdevs_in_pool $TESTPOOL $REMOVEDISK + +# +# Normally, we expect nopwrites to avoid allocating new blocks, but +# after a device has been removed the DVAs will get remapped when +# a L0's indirect bloock is written. This will negate the effects +# of nopwrite and should result in new allocations. +# + +# +# Perform a direct zil nopwrite test +# +log_must zfs set recordsize=8k $origin/clone +dd if=/$TESTDIR/file_8k of=/$TESTDIR/clone/file_8k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +# +# Perform an indirect zil nopwrite test +# +log_must zfs set recordsize=128k $origin/clone +dd if=/$TESTDIR/file_128k of=/$TESTDIR/clone/file_128k bs=1024k \ + oflag=sync conv=notrunc >/dev/null 2>&1 || log_fail "dd failed." +log_mustnot verify_nopwrite $origin $origin@a $origin/clone + +log_pass "Remove works with nopwrite." diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 521a1c7eb63c..8737ae55abfa 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -754,7 +754,7 @@ function verify_stream_size datasetexists $ds || log_fail "No such dataset: $ds" typeset stream_size=$(cat $stream | zstreamdump | sed -n \ - 's/ Total write size = \(.*\) (0x.*)/\1/p') + 's/ Total payload size = \(.*\) (0x.*)/\1/p') typeset inc_size=0 if [[ -n $inc_src ]]; then diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh index 49b846e9c332..443887bfa238 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh @@ -62,7 +62,7 @@ log_must eval "echo 'password' > $keyfile" log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \ -o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL -log_must block_device_wait +block_device_wait log_must eval "echo 'y' | newfs -t ext4 -v $zdev" log_must mkdir -p $mntpnt @@ -82,7 +82,7 @@ done log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile" log_must eval "zfs recv $TESTPOOL/recv < $sendfile" log_must zfs load-key $TESTPOOL/recv -log_must block_device_wait +block_device_wait log_must mount $recvdev $recvmnt diff --git a/tests/zfs-tests/tests/functional/slog/Makefile.am b/tests/zfs-tests/tests/functional/slog/Makefile.am index 4548ce63b40c..33e3a6d3a496 100644 --- a/tests/zfs-tests/tests/functional/slog/Makefile.am +++ b/tests/zfs-tests/tests/functional/slog/Makefile.am @@ -17,7 +17,8 @@ dist_pkgdata_SCRIPTS = \ slog_013_pos.ksh \ slog_014_pos.ksh \ slog_015_neg.ksh \ - slog_replay_fs.ksh \ + slog_replay_fs_001.ksh \ + slog_replay_fs_002.ksh \ slog_replay_volume.ksh dist_pkgdata_DATA = \ diff --git a/tests/zfs-tests/tests/functional/slog/setup.ksh b/tests/zfs-tests/tests/functional/slog/setup.ksh index f30824d3ee90..8e8d214d823c 100755 --- a/tests/zfs-tests/tests/functional/slog/setup.ksh +++ b/tests/zfs-tests/tests/functional/slog/setup.ksh @@ -38,13 +38,4 @@ if ! verify_slog_support ; then log_unsupported "This system doesn't support separate intent logs" fi -if [[ -d $VDEV ]]; then - log_must rm -rf $VDIR -fi -if [[ -d $VDEV2 ]]; then - log_must rm -rf $VDIR2 -fi -log_must mkdir -p $VDIR $VDIR2 -log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 - log_pass diff --git a/tests/zfs-tests/tests/functional/slog/slog.kshlib b/tests/zfs-tests/tests/functional/slog/slog.kshlib index 6ed7e4e0502f..75cfec2d832d 100644 --- a/tests/zfs-tests/tests/functional/slog/slog.kshlib +++ b/tests/zfs-tests/tests/functional/slog/slog.kshlib @@ -31,11 +31,20 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/slog/slog.cfg +function setup +{ + log_must rm -rf $VDIR $VDIR2 + log_must mkdir -p $VDIR $VDIR2 + log_must truncate -s $MINVDEVSIZE $VDEV $SDEV $LDEV $VDEV2 $SDEV2 $LDEV2 + + return 0 +} + function cleanup { poolexists $TESTPOOL && destroy_pool $TESTPOOL poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 - rm -rf $TESTDIR + rm -rf $TESTDIR $VDIR $VDIR2 } # diff --git a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh index 3d3daf5f9ccc..a4c35ed9e98e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_001_pos.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Creating a pool with a log device succeeds." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh index b056f19cdb80..91904aa612d1 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_002_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding a log device to normal pool works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh index c647b8f54b75..0b4d6ede3e13 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_003_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Adding an extra log device works." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh index 4b0b3439a2e3..10f28dcc000b 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_004_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Attaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh index cbbb9486913a..4836f6f27937 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_005_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Detaching a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh index 53e8c67ca005..24143196fd2e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_006_pos.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Replacing a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh index 4926fb7b3192..27ac38606c29 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_007_pos.ksh @@ -48,6 +48,7 @@ verify_runnable "global" log_assert "Exporting and importing pool with log devices passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh index 587e0e321222..54587a0c61a7 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_008_neg.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log is not supported." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh index e7091f17b759..222f71a99928 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_009_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "A raidz/raidz2 log can not be added to existed pool." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh index 8fe248ffbcba..edd9abea0930 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_010_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Slog device can not be replaced with spare device." log_onexit cleanup +log_must setup log_must zpool create $TESTPOOL $VDEV spare $SDEV log $LDEV sdev=$(random_get $SDEV) diff --git a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh index 2dad200b31c1..3bebc8201713 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_011_neg.ksh @@ -46,6 +46,7 @@ verify_runnable "global" log_assert "Offline and online a log device passes." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh index 45566d427f1d..8d6fb2bffb7f 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_012_neg.ksh @@ -45,6 +45,7 @@ verify_runnable "global" log_assert "Pool can survive when one of mirror log device get corrupted." log_onexit cleanup +log_must setup for type in "" "mirror" "raidz" "raidz2" do diff --git a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh index bbe5adc24174..d6917065ddbf 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_013_pos.ksh @@ -60,6 +60,7 @@ log_assert "Verify slog device can be disk, file, lofi device or any device " \ "that presents a block interface." verify_disk_count "$DISKS" 2 log_onexit cleanup_testenv +log_must setup dsk1=${DISKS%% *} log_must zpool create $TESTPOOL ${DISKS#$dsk1} diff --git a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh index 0ec96ae1e6f7..e8ea29f1ffa3 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_014_pos.ksh @@ -44,6 +44,7 @@ verify_runnable "global" log_assert "log device can survive when one of the pool device get corrupted." +log_must setup for type in "mirror" "raidz" "raidz2"; do for spare in "" "spare"; do diff --git a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh index 37821888ea00..fa6105116574 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_015_neg.ksh @@ -47,6 +47,7 @@ function cleanup ORIG_TIMEOUT=$(get_tunable zfs_commit_timeout_pct | tail -1 | awk '{print $NF}') log_onexit cleanup +log_must setup for PCT in 0 1 2 4 8 16 32 64 128 256 512 1024; do log_must set_tunable64 zfs_commit_timeout_pct $PCT diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh similarity index 94% rename from tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh rename to tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 5f281a756f15..3e5bccd2ef18 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -66,6 +66,7 @@ function cleanup_fs log_assert "Replay of intent log succeeds." log_onexit cleanup_fs +log_must setup # # 1. Create an empty file system (TESTFS) @@ -160,6 +161,14 @@ log_must attr -qs fileattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qs tmpattr -V HelloWorld /$TESTPOOL/$TESTFS/xattr.file log_must attr -qr tmpattr /$TESTPOOL/$TESTFS/xattr.file +# TX_WRITE, TX_LINK, TX_REMOVE +# Make sure TX_REMOVE won't affect TX_WRITE if file is not destroyed +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/link_and_unlink bs=128k \ + count=8 +log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ + /$TESTPOOL/$TESTFS/link_and_unlink.link +log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) # diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh new file mode 100755 index 000000000000..3c3ccdf4ad23 --- /dev/null +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_002.ksh @@ -0,0 +1,137 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify slog replay correctly when TX_REMOVEs are followed by +# TX_CREATEs. +# +# STRATEGY: +# 1. Create a file system (TESTFS) with a lot of files +# 2. Freeze TESTFS +# 3. Remove all files then create a lot of files +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare TESTFS against the TESTDIR/copy +# + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Replay of intent log succeeds." +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) with a lot of files +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# Prep for the test of TX_REMOVE followed by TX_CREATE +dnsize=(legacy auto 1k 2k 4k 8k 16k) +NFILES=200 +log_must mkdir /$TESTPOOL/$TESTFS/dir0 +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# Reimport to reset dnode allocation pointer. +# This is to make sure we will have TX_REMOVE and TX_CREATE on same id +# +log_must zpool export $TESTPOOL +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Remove all files then create a lot of files +# +# TX_REMOVE followed by TX_CREATE +log_must eval 'rm -f /$TESTPOOL/$TESTFS/dir0/*' +log_must eval 'for i in $(seq $NFILES); do zfs set dnodesize=${dnsize[$RANDOM % ${#dnsize[@]}]} $TESTPOOL/$TESTFS; touch /$TESTPOOL/$TESTFS/dir0/file.$i; done' + +# +# 4. Copy TESTFS to temporary location (TESTDIR/copy) +# +log_must mkdir -p $TESTDIR/copy +log_must cp -a /$TESTPOOL/$TESTFS/* $TESTDIR/copy/ + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare TESTFS against the TESTDIR/copy +# +log_note "Verify current block usage:" +log_must zdb -bcv $TESTPOOL + +log_note "Verify number of files" +log_must test "$(ls /$TESTPOOL/$TESTFS/dir0 | wc -l)" -eq $NFILES + +log_note "Verify working set diff:" +log_must diff -r /$TESTPOOL/$TESTFS $TESTDIR/copy + +log_pass "Replay of intent log succeeds." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh index 2cdcb38dc257..a72c83b5bfc6 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_volume.ksh @@ -76,6 +76,7 @@ function cleanup_volume log_assert "Replay of intent log succeeds." log_onexit cleanup_volume +log_must setup # # 1. Create an empty volume (TESTVOL), set sync=always, and format @@ -86,7 +87,7 @@ log_must zfs create -V 128M $TESTPOOL/$TESTVOL log_must zfs set compression=on $TESTPOOL/$TESTVOL log_must zfs set sync=always $TESTPOOL/$TESTVOL log_must mkdir -p $TESTDIR -log_must block_device_wait +block_device_wait echo "y" | newfs -t ext4 -v $VOLUME log_must mkdir -p $MNTPNT log_must mount -o discard $VOLUME $MNTPNT @@ -149,7 +150,7 @@ log_must zpool export $TESTPOOL # `zpool import -f` because we can't write a frozen pool's labels! # log_must zpool import -f $TESTPOOL -log_must block_device_wait +block_device_wait log_must mount $VOLUME $MNTPNT # diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh index 6607d4ca4974..1ee7e33c2ac2 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_009_pos.ksh @@ -88,7 +88,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #verify the snapshot -r results for snap in $snappool $snapfs $snapvol $snapctr $snapctrvol \ diff --git a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh index 0f876ad6d61e..128b443c6fc9 100755 --- a/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh +++ b/tests/zfs-tests/tests/functional/snapshot/snapshot_010_pos.ksh @@ -83,7 +83,7 @@ else fi log_must zfs snapshot -r $snappool -log_must block_device_wait +block_device_wait #select the $TESTCTR as destroy point, $TESTCTR is a child of $TESTPOOL log_must zfs destroy -r $snapctr @@ -92,7 +92,7 @@ for snap in $snapctr $snapctrvol $snapctrclone $snapctrfs; do log_fail "The snapshot $snap is not destroyed correctly." done -for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1;do +for snap in $snappool $snapfs $snapvol $ctrfs@$TESTSNAP1; do ! snapexists $snap && \ log_fail "The snapshot $snap should be not destroyed." done diff --git a/tests/zfs-tests/tests/functional/suid/.gitignore b/tests/zfs-tests/tests/functional/suid/.gitignore new file mode 100644 index 000000000000..a9a3db79ba44 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/.gitignore @@ -0,0 +1 @@ +/suid_write_to_file diff --git a/tests/zfs-tests/tests/functional/suid/Makefile.am b/tests/zfs-tests/tests/functional/suid/Makefile.am new file mode 100644 index 000000000000..594d2b77ca8e --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/Makefile.am @@ -0,0 +1,16 @@ +include $(top_srcdir)/config/Rules.am + +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +dist_pkgdata_SCRIPTS = \ + suid_write_to_suid.ksh \ + suid_write_to_sgid.ksh \ + suid_write_to_suid_sgid.ksh \ + suid_write_to_none.ksh \ + cleanup.ksh \ + setup.ksh + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/suid + +pkgexec_PROGRAMS = suid_write_to_file +suid_write_to_file_SOURCES = suid_write_to_file.c diff --git a/tests/zfs-tests/tests/functional/suid/cleanup.ksh b/tests/zfs-tests/tests/functional/suid/cleanup.ksh new file mode 100755 index 000000000000..6e41e02faf58 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/suid/setup.ksh b/tests/zfs-tests/tests/functional/suid/setup.ksh new file mode 100755 index 000000000000..d04d5568c003 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/setup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c new file mode 100644 index 000000000000..571dc553bec2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void +test_stat_mode(mode_t extra) +{ + struct stat st; + int i, fd; + char fpath[1024]; + char *penv[] = {"TESTDIR", "TESTFILE0"}; + char buf[] = "test"; + mode_t res; + mode_t mode = 0777 | extra; + + /* + * Get the environment variable values. + */ + for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { + if ((penv[i] = getenv(penv[i])) == NULL) { + fprintf(stderr, "getenv(penv[%d])\n", i); + exit(1); + } + } + + umask(0); + if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) { + perror("mkdir"); + exit(2); + } + + snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]); + unlink(fpath); + if (stat(fpath, &st) == 0) { + fprintf(stderr, "%s exists\n", fpath); + exit(3); + } + + fd = creat(fpath, mode); + if (fd == -1) { + perror("creat"); + exit(4); + } + close(fd); + + if (setuid(65534) == -1) { + perror("setuid"); + exit(5); + } + + fd = open(fpath, O_RDWR); + if (fd == -1) { + perror("open"); + exit(6); + } + + if (write(fd, buf, sizeof (buf)) == -1) { + perror("write"); + exit(7); + } + close(fd); + + if (stat(fpath, &st) == -1) { + perror("stat"); + exit(8); + } + unlink(fpath); + + /* Verify SUID/SGID are dropped */ + res = st.st_mode & (0777 | S_ISUID | S_ISGID); + if (res != (mode & 0777)) { + fprintf(stderr, "stat(2) %o\n", res); + exit(9); + } +} + +int +main(int argc, char *argv[]) +{ + const char *name; + mode_t extra; + + if (argc < 2) { + fprintf(stderr, "Invalid argc\n"); + exit(1); + } + + name = argv[1]; + if (strcmp(name, "SUID") == 0) { + extra = S_ISUID; + } else if (strcmp(name, "SGID") == 0) { + extra = S_ISGID; + } else if (strcmp(name, "SUID_SGID") == 0) { + extra = S_ISUID | S_ISGID; + } else if (strcmp(name, "NONE") == 0) { + extra = 0; + } else { + fprintf(stderr, "Invalid name %s\n", name); + exit(1); + } + + test_stat_mode(extra); + + return (0); +} diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh new file mode 100755 index 000000000000..dd01978619f9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to regular file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to regular file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" + +log_pass "Verify write(2) to regular file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh new file mode 100755 index 000000000000..49ae2bd1b31e --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" + +log_pass "Verify write(2) to SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh new file mode 100755 index 000000000000..3983aad2e51d --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" + +log_pass "Verify write(2) to SUID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh new file mode 100755 index 000000000000..a058c7e7d4bc --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh @@ -0,0 +1,52 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Tomohiro Kusumi. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify write(2) to SUID/SGID file by non-owner. +# Also see https://github.com/pjd/pjdfstest/blob/master/tests/chmod/12.t +# +# STRATEGY: +# 1. creat(2) a file with SUID/SGID. +# 2. write(2) to the file with uid=65534. +# 3. stat(2) the file and verify .st_mode value. +# + +verify_runnable "both" + +function cleanup +{ + rm -f $TESTDIR/$TESTFILE0 +} + +log_onexit cleanup +log_note "Verify write(2) to SUID/SGID file by non-owner" + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" + +log_pass "Verify write(2) to SUID/SGID file by non-owner passed"